# Pandas with python

# Dataframe is most commonly used object in pandas. It is a table like datastructure containing rows and columns similar to excel spreadsheet

In [5]:
import pandas as pd
weather_data = {   #you can also get a file by making a dictionary other than calling a csv file.
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35,28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event': ['Rain', 'Sunny', 'Snow','Snow','Rain', 'Sunny']
}
df = pd.DataFrame(weather_data)
df = pd.read_csv("file5.csv")
df

Unnamed: 0,day,temperature,windspeed,event
0,01/01/17,32,6,Rain
1,01/02/17,35,7,Sunny
2,01/03/17,28,2,Snow
3,01/04/17,24,7,Snow
4,01/05/17,32,4,Rain
5,01/06/17,31,2,Sunny


In [7]:
rows , columns = df.shape # rows, columns = df.shape

In [8]:
columns

4

In [9]:
rows

6

In [10]:
df.head(3)

Unnamed: 0,day,temperature,windspeed,event
0,01/01/17,32,6,Rain
1,01/02/17,35,7,Sunny
2,01/03/17,28,2,Snow


In [11]:
df.tail()  #printing the last five rows

Unnamed: 0,day,temperature,windspeed,event
1,01/02/17,35,7,Sunny
2,01/03/17,28,2,Snow
3,01/04/17,24,7,Snow
4,01/05/17,32,4,Rain
5,01/06/17,31,2,Sunny


In [12]:
df[1:3]

Unnamed: 0,day,temperature,windspeed,event
1,01/02/17,35,7,Sunny
2,01/03/17,28,2,Snow


In [13]:
df.columns  #showing the columns you have.

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [14]:
df['day'] # or df.day

0    01/01/17
1    01/02/17
2    01/03/17
3    01/04/17
4    01/05/17
5    01/06/17
Name: day, dtype: object

In [18]:
type(df['day'])

pandas.core.series.Series

In [19]:

df[['day','temperature']]  #to get some columns, use double square quotes.

Unnamed: 0,day,temperature
0,01/01/17,32
1,01/02/17,35
2,01/03/17,28
3,01/04/17,24
4,01/05/17,32
5,01/06/17,31


In [20]:
df['temperature'].max()  #df['temperature'] prints the temperature column, then the add the max()

35

In [21]:
df['temperature'].mean()

30.333333333333332

In [22]:
df['temperature'].min()

24

In [23]:
df['temperature'].std()

3.8297084310253524

In [26]:
df.describe()   #gives statistics of the data

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [27]:
df[df['temperature']>32]

Unnamed: 0,day,temperature,windspeed,event
1,01/02/17,35,7,Sunny


In [28]:
df['day'][df['temperature'] == df['temperature'].max()] # Kinda doing SQL in pandas

1    01/02/17
Name: day, dtype: object

In [29]:
df[df['temperature'] == df['temperature'].max()] # Kinda doing SQL in pandas

Unnamed: 0,day,temperature,windspeed,event
1,01/02/17,35,7,Sunny


In [30]:
df['event'].max() # But mean() won't work since data type is string

'Sunny'

In [31]:
df.describe()   #gives statistics of the data.But it doesnot give for the event because it is a string

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [33]:
df.index   #our index starts from 0

RangeIndex(start=0, stop=6, step=1)

In [35]:
df.set_index('day',inplace=True)   #to set our index to be the day. put inplace=true, so that it replaces the original data frame.

In [36]:
#After doing the above, you can now run a loc function.
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
01/01/17,32,6,Rain
01/02/17,35,7,Sunny
01/03/17,28,2,Snow
01/04/17,24,7,Snow
01/05/17,32,4,Rain
01/06/17,31,2,Sunny


In [38]:
df.index


Index(['01/01/17', '01/02/17', '01/03/17', '01/04/17', '01/05/17', '01/06/17'], dtype='object', name='day')

In [42]:
df.loc['01/04/17']   #it gives you this particular row.since now dates are acting as index

temperature      24
windspeed         7
event          Snow
Name: 01/04/17, dtype: object

In [43]:
df.reset_index(inplace=True)  #to return back the previuos index.Or resseting back the original index
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,01/01/17,32,6,Rain
1,01/02/17,35,7,Sunny
2,01/03/17,28,2,Snow
3,01/04/17,24,7,Snow
4,01/05/17,32,4,Rain


In [44]:
df.set_index('event',inplace=True) # this is kind of building a hash map using event as a key
df


Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,01/01/17,32,6
Sunny,01/02/17,35,7
Snow,01/03/17,28,2
Snow,01/04/17,24,7
Rain,01/05/17,32,4
Sunny,01/06/17,31,2


In [45]:
df.loc['Snow']

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Snow,01/03/17,28,2
Snow,01/04/17,24,7


# DEALING WITH MISSING DATA