### Data Manipulation and Analysis with Pandas

Data manipulation and analysis are keys tasks in any data science or data analyst project. Pandas provides a wide range of functions for data manipulation and analysis, making it esaier to clean, transform, and extract insights from data. In this lesson, we will cover various data manipulations and analysis techniques using Pandas. 

In [2]:
import pandas as pd

df = pd.read_csv('example.csv')

## fetch the first 5 rows 

df.head(5)

Unnamed: 0,Name,Age,Number,Zip_code
0,Hola,12,89,45
1,Faaa,87,9,6
2,Alo,7,5,8
3,Acaaa,8,6,8
4,Hola,12,89,45


In [3]:
df.tail(5)

Unnamed: 0,Name,Age,Number,Zip_code
59,Acaaa,8,6,8
60,Hola,12,89,45
61,Faaa,87,9,6
62,Alo,7,5,8
63,Acaaa,8,6,8


In [4]:
df.describe()

Unnamed: 0,Age,Number,Zip_code
count,64.0,64.0,64.0
mean,28.5,27.25,16.75
std,34.094174,35.963827,16.459667
min,7.0,5.0,6.0
25%,7.75,5.75,7.5
50%,10.0,7.5,8.0
75%,30.75,29.0,17.25
max,87.0,89.0,45.0


In [6]:
df.dtypes

Name        object
Age          int64
Number       int64
Zip_code     int64
dtype: object

In [11]:
## Handling missing values
df.isnull().sum()

Name        0
Age         0
Number      0
Zip_code    0
dtype: int64

In [13]:
df_filled = df.fillna(0)

In [14]:
## filling missing vvalues with the mean of the column

df['Name']

0      Hola
1      Faaa
2       Alo
3     Acaaa
4      Hola
      ...  
59    Acaaa
60     Hola
61     Faaa
62      Alo
63    Acaaa
Name: Name, Length: 64, dtype: object

In [16]:
df['Age_fillNA'] = df['Age'].fillna(df['Age'].mean())
df

Unnamed: 0,Name,Age,Number,Zip_code,Age_fillNA
0,Hola,12,89,45,12
1,Faaa,87,9,6,87
2,Alo,7,5,8,7
3,Acaaa,8,6,8,8
4,Hola,12,89,45,12
...,...,...,...,...,...
59,Acaaa,8,6,8,8
60,Hola,12,89,45,12
61,Faaa,87,9,6,87
62,Alo,7,5,8,7


In [17]:
df.dtypes

Name          object
Age            int64
Number         int64
Zip_code       int64
Age_fillNA     int64
dtype: object

In [20]:
## Renaming columns

df = df.rename(columns={'Name':'Full_Name'})
df.head()

Unnamed: 0,Full_Name,Age,Number,Zip_code,Age_fillNA
0,Hola,12,89,45,12
1,Faaa,87,9,6,87
2,Alo,7,5,8,7
3,Acaaa,8,6,8,8
4,Hola,12,89,45,12


In [21]:
df['Value_new'] = df['Zip_code'].astype(float)
df.head()

Unnamed: 0,Full_Name,Age,Number,Zip_code,Age_fillNA,Value_new
0,Hola,12,89,45,12,45.0
1,Faaa,87,9,6,87,6.0
2,Alo,7,5,8,7,8.0
3,Acaaa,8,6,8,8,8.0
4,Hola,12,89,45,12,45.0


In [None]:
df['New Value'] = df['Zip_code'].apply(lambda x:x**2)
df['New Value'] = df['Zip_code'].apply(lambda x:x==2)
df

Unnamed: 0,Full_Name,Age,Number,Zip_code,Age_fillNA,Value_new,New Value
0,Hola,12,89,45,12,45.0,2025
1,Faaa,87,9,6,87,6.0,36
2,Alo,7,5,8,7,8.0,64
3,Acaaa,8,6,8,8,8.0,64
4,Hola,12,89,45,12,45.0,2025
...,...,...,...,...,...,...,...
59,Acaaa,8,6,8,8,8.0,64
60,Hola,12,89,45,12,45.0,2025
61,Faaa,87,9,6,87,6.0,36
62,Alo,7,5,8,7,8.0,64


In [None]:
### Data aggregating and Grouping

grouped_mean=df.groupby('Full_Name')['Number'].mean()
print(grouped_mean)


Full_Name
Acaaa     6.0
Alo       5.0
Faaa      9.0
Hola     89.0
Name: Number, dtype: float64


In [27]:
grouped_two = df.groupby(['Full_Name','Zip_code'])['Number'].sum()
grouped_two

Full_Name  Zip_code
Acaaa      8             96
Alo        8             80
Faaa       6            144
Hola       45          1424
Name: Number, dtype: int64

In [30]:
grouped_two = df.groupby(['Full_Name','Zip_code'])['Number'].mean()
grouped_two

Full_Name  Zip_code
Acaaa      8            6.0
Alo        8            5.0
Faaa       6            9.0
Hola       45          89.0
Name: Number, dtype: float64

In [33]:
### aggregate multiple functions

grouped_agg = df.groupby('Full_Name')['Number'].agg(['mean','sum','count'])
grouped_agg

Unnamed: 0_level_0,mean,sum,count
Full_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Acaaa,6.0,96,16
Alo,5.0,80,16
Faaa,9.0,144,16
Hola,89.0,1424,16
