# Pandas DataFrame

In [41]:
import pandas as pd

In [72]:
# Manually creating DF
weather_data = {
    'day': ['1/1/2017', '1/2/2017', '1/4/2017'],
    'temp': [32, 35, 36],
    'windspeed': [6, 7, 2],
    'event': ['Rain', 'Sunny', 'Snow']
}

df = pd.DataFrame(weather_data)
df

Unnamed: 0,day,temp,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/4/2017,36,2,Snow


## Read CSV File

In [69]:
weather_data = "/content/drive/MyDrive/AI ML LAB/dataframe basics/weather_data.csv"
df = pd.read_csv(weather_data)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


## Basic Operations on DF

In [70]:
df.shape

(6, 4)

In [44]:
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


In [45]:
df.tail()

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [46]:
df[1:3]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow


In [47]:
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [48]:
type(df["day"])

pandas.core.series.Series

In [49]:
df[["day", "temperature"]]

Unnamed: 0,day,temperature
0,1/1/2017,32
1,1/2/2017,35
2,1/3/2017,28
3,1/4/2017,24
4,1/5/2017,32
5,1/6/2017,31


In [50]:
df["temperature"].max()

35

## Filtering and masks

In [51]:
df[df["temperature"] > 32]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [52]:
df[df["temperature"] == df["temperature"].max()]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [53]:
df[df["temperature"] == df["temperature"].max()]["day"]

1    1/2/2017
Name: day, dtype: object

## Stats

In [54]:
df["temperature"].std()

3.8297084310253524

In [55]:
df["event"].max()

'Sunny'

In [56]:
# df["event"].mean()
# Causes error since cant find mean for Strings

In [57]:
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


## Modifying Index

In [58]:
df.set_index("day")

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [62]:
df.set_index("day", inplace=True)

In [63]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [64]:
df.loc['1/1/2017']

temperature      32
windspeed         6
event          Rain
Name: 1/1/2017, dtype: object

In [65]:
df.reset_index(inplace=True)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [66]:
df = df.set_index("event")
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Sunny,1/2/2017,35,7
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
Rain,1/5/2017,32,4
Sunny,1/6/2017,31,2


In [67]:
df.loc["Snow"]

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
