# <font color="purple"><h3 align="center">DataFrame Basics Tutorial</h3></font>

## **Dataframe is most commonly used object in pandas. It is a table like datastructure containing rows and columns similar to excel spreadsheet**

In [None]:
import pandas as pd
weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35,28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event': ['Rain', 'Sunny', 'Snow','Snow','Rain', 'Sunny']
}
weather_dataframe = pd.DataFrame(weather_data)
weather_dataframe


Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [None]:
df= pd.read_csv("/content/weather_data.csv")
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [None]:
df.shape # rows, columns = df.shape

(6, 4)

## <font color='blue'>Rows</font>

In [None]:
#df.head() 
df.head(3)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow


In [None]:
df.tail()

df.tail(2)

Unnamed: 0,day,temperature,windspeed,event
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [None]:
df[1:4]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow


## <font color='blue'>Columns</font>

In [None]:
df.columns
print(df.columns)

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')


Accessing the columns:

In [None]:
df['day'] # or df.day

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
Name: day, dtype: object

In [None]:
type(df['day'])

pandas.core.series.Series

In [None]:
# error : for not accessing in 2d format
df['day','temperature']

KeyError: ignored

In [None]:
df[['day','temperature']]

Unnamed: 0,day,temperature
0,1/1/2017,32
1,1/2/2017,35
2,1/3/2017,28
3,1/4/2017,24
4,1/5/2017,32
5,1/6/2017,31


In [None]:
# we cannot access multiple keys in dictionaries 
weather_dataframe["day","temperature"]

KeyError: ignored

In [None]:
# we can access multiple items in data frames
weather_dataframe[["day","temperature"]]


Unnamed: 0,day,temperature
0,1/1/2017,32
1,1/2/2017,35
2,1/3/2017,28
3,1/4/2017,24
4,1/5/2017,32
5,1/6/2017,31


## <font color='blue'>Operations On DataFrame</font>

In [None]:
df['temperature'].max()

35

In [None]:
df[df['temperature']>32] # get all rows in date frame where temperature >32

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [None]:
df[df['temperature'] == df['temperature'].max()] # Kinda doing SQL in pandas getting all values from one condition of the column

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [None]:
df['day'][df['temperature'] == df['temperature'].max()] # Kinda doing SQL in pandas getting only one column's value

1    1/2/2017
Name: day, dtype: object

In [None]:
# syntax :  df[colss to be printed][condition in the data frames]
df[["day","temperature"]][df["temperature"]==df["temperature"].max()]

Unnamed: 0,day,temperature
1,1/2/2017,35


In [None]:
df['temperature'].std()

3.8297084310253524

In [None]:
df['event'].max() 
# df["event"].mean() # But mean() won't work since data type is string

'Sunny'

In [None]:
df["event"].mean() #But mean() won't work since data type is string

TypeError: ignored

In [None]:
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


**Google pandas series operations to find out list of all operations**
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html

## <font color='blue'>set_index</font>

In [None]:
df # indexing is the defualt 0 to 5 values

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [None]:
df.set_index('day') # indexing now got changed to the column day

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [None]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [None]:
df.set_index('day', inplace=True) # we cannot run the indexing and write the opertaions because the original data frame is not changed we are just changing the display

# for the original dataframe to be changed we can use the syntax inplace=true at the end of the data frame

In [None]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [None]:
df.index

Index(['1/1/2017', '1/2/2017', '1/3/2017', '1/4/2017', '1/5/2017', '1/6/2017'], dtype='object', name='day')

In [None]:
df.loc['1/2/2017'] # advantage of changing the index is by using the location directly that particular date can be selected and we can get the column values for that particular row

temperature       35
windspeed          7
event          Sunny
Name: 1/2/2017, dtype: object

In [None]:
df.reset_index(inplace=True)  # again to reset the index back
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


In [None]:
df.loc['1/2/2017'] # only after setting index we can access with the loc

KeyError: ignored

In [None]:
df.set_index('event',inplace=True) # this is kind of building a hash map using event as a key
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Sunny,1/2/2017,35,7
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
Rain,1/5/2017,32,4
Sunny,1/6/2017,31,2


In [None]:
df.loc['Snow']

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
