# Read Tabular Data using Pandas

## Bad method

In [None]:
with open('weather_data.csv') as f:
    data = f.read()

In [None]:
data

'day,temp,condition\nMonday,12,Sunny\nTuesday,14,Rain\nWednesday,15,Rain\nThursday,14,Cloudy\nFriday,21,Sunny\nSaturday,22,Sunny\nSunday,24,Sunny'

## Good method

In [1]:
import pandas as pd

In [None]:
df = pd.read_csv('weather_data.csv')

In [2]:
df = pd.read_excel('weather_data.xlsx')

In [3]:
df

Unnamed: 0,day,temp,condition
0,Monday,12,Sunny
1,Tuesday,14,Rain
2,Wednesday,15,Rain
3,Thursday,14,Cloudy
4,Friday,21,Sunny
5,Saturday,22,Sunny
6,Sunday,24,Sunny


In [5]:
print(df)

         day  temp condition
0     Monday    12     Sunny
1    Tuesday    14      Rain
2  Wednesday    15      Rain
3   Thursday    14    Cloudy
4     Friday    21     Sunny
5   Saturday    22     Sunny
6     Sunday    24     Sunny


In [23]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


## Pandas Dataframe

In [7]:
df.head(3)

Unnamed: 0,day,temp,condition
0,Monday,12,Sunny
1,Tuesday,14,Rain
2,Wednesday,15,Rain


In [8]:
df.tail(3)

Unnamed: 0,day,temp,condition
4,Friday,21,Sunny
5,Saturday,22,Sunny
6,Sunday,24,Sunny


In [9]:
df.shape

(7, 3)

In [10]:
df.dtypes

Unnamed: 0,0
day,object
temp,int64
condition,object


In [11]:
df.describe()

Unnamed: 0,temp
count,7.0
mean,17.428571
std,4.755949
min,12.0
25%,14.0
50%,15.0
75%,21.5
max,24.0


## Pandas Series

In [12]:
tmp = df['temp']

In [13]:
tmp

Unnamed: 0,temp
0,12
1,14
2,15
3,14
4,21
5,22
6,24


In [22]:
print(type(tmp))

<class 'pandas.core.series.Series'>


In [15]:
tmp.describe()

Unnamed: 0,temp
count,7.0
mean,17.428571
std,4.755949
min,12.0
25%,14.0
50%,15.0
75%,21.5
max,24.0


In [21]:
tmp.max()

24

In [24]:
tmp.min()

12

In [25]:
tmp.mean()

np.float64(17.428571428571427)

In [26]:
conditions = df['condition']

In [27]:
conditions.describe()

Unnamed: 0,condition
count,7
unique,3
top,Sunny
freq,4


## Conditional Indexing

In [28]:
df['condition'] == 'Sunny'

Unnamed: 0,condition
0,True
1,False
2,False
3,False
4,True
5,True
6,True


In [29]:
df[df['condition'] == 'Sunny']

Unnamed: 0,day,temp,condition
0,Monday,12,Sunny
4,Friday,21,Sunny
5,Saturday,22,Sunny
6,Sunday,24,Sunny


In [30]:
df[df['temp'] == df['temp'].max()]

Unnamed: 0,day,temp,condition
6,Sunday,24,Sunny


In [31]:
df[df['temp'] >= df['temp'].mean()]

Unnamed: 0,day,temp,condition
4,Friday,21,Sunny
5,Saturday,22,Sunny
6,Sunday,24,Sunny
