# Análise de datos con PANDAS

In [2]:
import pandas as pd

### Apertura do CSV

In [3]:
df = pd.DataFrame()

df = pd.read_csv('datos_covid2021_3paises.csv')
#df.to_csv?
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1095 entries, 0 to 1094
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  1095 non-null   int64 
 1   dia         1095 non-null   object
 2   pais        1095 non-null   object
 3   contaxios   1095 non-null   int64 
 4   mortes      1095 non-null   int64 
dtypes: int64(3), object(2)
memory usage: 42.9+ KB


In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,dia,pais,contaxios,mortes
0,0,2021-01-01,Spain,18047,148
1,1,2021-01-02,Spain,0,0
2,2,2021-01-03,Spain,0,0
3,3,2021-01-04,Spain,30579,241
4,4,2021-01-05,Spain,23700,352


In [5]:
# Mostrar los valores donde los contagios y las muertes son 0
df[(df['contaxios'] == 0) & (df['mortes'] == 0)]

Unnamed: 0.1,Unnamed: 0,dia,pais,contaxios,mortes
1,1,2021-01-02,Spain,0,0
2,2,2021-01-03,Spain,0,0
5,5,2021-01-06,Spain,0,0
8,8,2021-01-09,Spain,0,0
9,9,2021-01-10,Spain,0,0
...,...,...,...,...,...
987,987,2021-09-15,France,0,0
992,992,2021-09-20,France,0,0
1015,1015,2021-10-13,France,0,0
1060,1060,2021-11-27,France,0,0



### Limpia valores nulos e/ou inválidos/vacíos.

In [6]:
# Quitamos las filas donde sean 0
df = df[~((df['contaxios'] == 0) & (df['mortes'] == 0))]

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 958 entries, 0 to 1094
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  958 non-null    int64 
 1   dia         958 non-null    object
 2   pais        958 non-null    object
 3   contaxios   958 non-null    int64 
 4   mortes      958 non-null    int64 
dtypes: int64(3), object(2)
memory usage: 44.9+ KB


### 1. Calcula a media de novos casos ao día

In [8]:
df['contaxios'].mean()

13823.913361169103

### 2. Calcula a media de mortes ao día

In [9]:
df['mortes'].mean()

112.01670146137788

### 3. Calcula o número máximo de casos

In [10]:
df['contaxios'].max()

232200

### 4. Calcula o número medio de casos separado por grupos

In [14]:
grupos = df.groupby('pais')[['contaxios', 'mortes']].mean().reset_index()
grupos

Unnamed: 0,pais,contaxios,mortes
0,France,22353.522255,166.893175
1,Portugal,2681.482094,33.146006
2,Spain,18359.666667,151.306202


### 5. Existe unha correlación entre número de novos casos e mortes?

In [15]:
grupos['ratio'] = grupos['mortes'] / grupos['contaxios']
grupos

Unnamed: 0,pais,contaxios,mortes,ratio
0,France,22353.522255,166.893175,0.007466
1,Portugal,2681.482094,33.146006,0.012361
2,Spain,18359.666667,151.306202,0.008241


A mayor número de contagios por país parece que el ratio de muertes por contagio disminuye.

### 6. No caso de que exista unha relación, en que país é maior?

In [None]:
grupos.iloc[[grupos['ratio'].idxmax()]]

Unnamed: 0,pais,contaxios,mortes,ratio
1,Portugal,2681.482094,33.146006,0.012361


El país con mayor ratio de muertes es Portugal

### 7. Cal é a mediana de casos confirmados en Portugal?

In [18]:
df[df['pais'] == 'Portugal']['contaxios'].median()

1190.0

### 8. Cales son os valores dos percentís Q1 e Q3 de Francia para o número de contaxios

In [None]:
df[df['pais'] == 'France']['contaxios'].quantile([0.25, 0.75])

0.25     5095.0
0.75    26079.0
Name: contaxios, dtype: float64

### 9. Cantos valores diferentes hai para o número de contaxios nun día en cada un dos países

In [26]:
# Cuantos valores difrentes tienen en el numero de contagios los 3 paises
df.groupby('pais')['contaxios'].nunique()

pais
France      332
Portugal    345
Spain       250
Name: contaxios, dtype: int64

### 10. Valor mínimo e máximo de contaxios por día para cada país

In [None]:
df.groupby('pais')['contaxios'].agg(['min', 'max'])

Unnamed: 0_level_0,min,max
pais,Unnamed: 1_level_1,Unnamed: 2_level_1
France,0,232200
Portugal,0,30829
Spain,0,214619
