# <font color="purple"><h3 align="center">DataFrame Basics Tutorial</h3></font>

## **Dataframe is most commonly used object in pandas. It is a table like datastructure containing rows and columns similar to excel spreadsheet**

In [1]:
import pandas as pd
from pandas import DataFrame
weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35,28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event': ['Rain', 'Sunny', 'Snow','Snow','Sunny', 'Sunny']
}
df = pd.DataFrame(weather_data)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [2]:
df.shape # rows, columns = df.shape

(6, 4)

In [3]:
newdf = df[2:5]
newdf

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny


## <font color='blue'>Rows</font>

In [7]:
df.head() # df.head(3)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny


In [9]:
df.tail() # df.tail(2)

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [12]:
df.index[df.day ==  "1/4/2017"][0]

3

In [13]:
startindex = int(df.index[df.day ==  "1/4/2017"][0])
type(startindex)

int

In [15]:
df[ startindex : startindex + 3 ]

Unnamed: 0,day,temperature,windspeed,event
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


## <font color='blue'>Columns</font>

In [17]:
print(df.columns)
columnNames = df.columns.to_list()
print(columnNames)

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')
['day', 'temperature', 'windspeed', 'event']


In [18]:
df.event

0     Rain
1    Sunny
2     Snow
3     Snow
4    Sunny
5    Sunny
Name: event, dtype: object

In [19]:
eventlist = df.event.to_list()
eventlist

['Rain', 'Sunny', 'Snow', 'Snow', 'Sunny', 'Sunny']

In [23]:
twodf = df[['day','event']]
twodf

Unnamed: 0,day,event
0,1/1/2017,Rain
1,1/2/2017,Sunny
2,1/3/2017,Snow
3,1/4/2017,Snow
4,1/5/2017,Sunny
5,1/6/2017,Sunny


In [24]:
df[["day", "event"]]

Unnamed: 0,day,event
0,1/1/2017,Rain
1,1/2/2017,Sunny
2,1/3/2017,Snow
3,1/4/2017,Snow
4,1/5/2017,Sunny
5,1/6/2017,Sunny


## <font color='blue'>Operations On DataFrame</font>

In [26]:
print(df.temperature.mean())
print(df['temperature'].std())

30.333333333333332
3.8297084310253524


In [20]:
len(df[ df['temperature'] > 30 ])

4

In [48]:
df['day'][df['temperature'] == df['temperature'].min()] # Kinda doing SQL in pandas

3    1/4/2017
Name: day, dtype: object

In [49]:
df['temperature'].mean() # Kinda doing SQL in pandas

30.333333333333332

In [27]:
df['temperature'].std()

3.8297084310253524

In [51]:
df['event'].max() # But mean() won't work since data type is string

'Sunny'

In [25]:
# get max occuring element 

In [28]:
df.event.value_counts()

Sunny    3
Snow     2
Rain     1
Name: event, dtype: int64

In [56]:
df.event.value_counts().index[0]

'Sunny'

In [55]:
df.event.value_counts()[0]

3

In [57]:
print("Max Event : " + df.event.value_counts().index[0] + " and it occured " + str( df.event.value_counts()[0]) + " times" )

Max Event : Sunny and it occured 3 times


In [59]:
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


**Google pandas series operations to find out list of all operations**
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html

## <font color='blue'>set_index</font>

In [34]:
df.set_index("day", inplace=True)

In [35]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Sunny
1/6/2017,31,2,Sunny


In [38]:
df.loc["1/1/2017" : "1/4/2017"]

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow


In [32]:
df.shape

(6, 3)

In [39]:
df.reset_index(inplace=True)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [40]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [41]:
newdf = df 

newdf.set_index("day" , inplace=True)

In [42]:
newdf

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Sunny
1/6/2017,31,2,Sunny


In [69]:
newdf

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [70]:
newdf.set_index("day" , inplace=True)

In [71]:
newdf

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Sunny
1/6/2017,31,2,Sunny


In [72]:
newdf.set_index("event", inplace=True)
newdf

Unnamed: 0_level_0,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1
Rain,32,6
Sunny,35,7
Snow,28,2
Snow,24,7
Sunny,32,4
Sunny,31,2


In [74]:
df["NewData"] = np.arange(0,6)
df.reset_index(inplace=True)
df

Unnamed: 0,event,temperature,windspeed,NewData
0,Rain,32,6,0
1,Sunny,35,7,1
2,Snow,28,2,2
3,Snow,24,7,3
4,Sunny,32,4,4
5,Sunny,31,2,5


In [75]:
df.drop("event", inplace=True, axis= 1)

In [76]:
df

Unnamed: 0,temperature,windspeed,NewData
0,32,6,0
1,35,7,1
2,28,2,2
3,24,7,3
4,32,4,4
5,31,2,5


In [78]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [79]:
df.set_index("temperature", inplace=True)

In [80]:
df

Unnamed: 0_level_0,windspeed,NewData
temperature,Unnamed: 1_level_1,Unnamed: 2_level_1
32,6,0
35,7,1
28,2,2
24,7,3
32,4,4
31,2,5


In [82]:
df.loc[32]

Unnamed: 0_level_0,windspeed,NewData
temperature,Unnamed: 1_level_1,Unnamed: 2_level_1
32,6,0
32,4,4


In [83]:
df.head()

Unnamed: 0_level_0,windspeed,NewData
temperature,Unnamed: 1_level_1,Unnamed: 2_level_1
32,6,0
35,7,1
28,2,2
24,7,3
32,4,4


In [86]:
weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35,28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event': ['Rain', 'Sunny', 'Snow','Snow','Sunny', 'Sunny']
}
df = pd.DataFrame(weather_data)
df.set_index('event',inplace=True) # this is kind of building a hash map using event as a key
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Sunny,1/2/2017,35,7
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
Sunny,1/5/2017,32,4
Sunny,1/6/2017,31,2


In [87]:
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Sunny,1/2/2017,35,7
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
Sunny,1/5/2017,32,4
Sunny,1/6/2017,31,2


In [5]:
columns = []
data = dict()

num = int(input("please enter the number of columns"))
while(num > 0):
    columns.append(input("please enter the column name: "))
    num -=1

for i in columns:
    data[i] = []

rows = int(input("please enter the number of rows: "))
while(rows > 0):
    for i in data:
        value = input(f"please enter the value of {i}: ")
        data[i].append(value)

    rows-=1
dataframe = pd.DataFrame(data)
dataframe

please enter the number of columns2
please enter the column name: Name
please enter the column name: Age
please enter the number of rows: 3
please enter the value of Name: Ahmed
please enter the value of Age: 23
please enter the value of Name: Ali
please enter the value of Age: 20
please enter the value of Name: Hesham
please enter the value of Age: 47


Unnamed: 0,Name,Age
0,Ahmed,23
1,Ali,20
2,Hesham,47
