# Dataframe Basics


Dataframe is a main object in Pandas. It is used to represent data with rows and columns (tabular or excel spreadsheet like data)

In [4]:
import pandas as pd
df = pd.read_csv("weather_data.csv")
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2023,32,6,Rain
1,1/2/2023,35,7,Sunny
2,1/3/2023,28,2,Snow
3,1/4/2023,24,7,Snow
4,1/5/2023,32,4,Rain
5,1/6/2023,32,2,Sunny


In [5]:
# If you have big data , you can use df.head() func.
df.head()


Unnamed: 0,day,temperature,windspeed,event
0,1/1/2023,32,6,Rain
1,1/2/2023,35,7,Sunny
2,1/3/2023,28,2,Snow
3,1/4/2023,24,7,Snow
4,1/5/2023,32,4,Rain


In [6]:
df.head(2)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2023,32,6,Rain
1,1/2/2023,35,7,Sunny


In [7]:
# For last rows , if you want to get it you can use df.tail() func.
df.tail(2)

Unnamed: 0,day,temperature,windspeed,event
4,1/5/2023,32,4,Rain
5,1/6/2023,32,2,Sunny


In [8]:
# If you want to print spesific rows than you can use 
df[2:5] # 5 is not included

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2023,28,2,Snow
3,1/4/2023,24,7,Snow
4,1/5/2023,32,4,Rain


In [9]:
# You can print everything with
df
df[:]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2023,32,6,Rain
1,1/2/2023,35,7,Sunny
2,1/3/2023,28,2,Snow
3,1/4/2023,24,7,Snow
4,1/5/2023,32,4,Rain
5,1/6/2023,32,2,Sunny


In [11]:
df.columns


Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [16]:
df.day


0    1/1/2023
1    1/2/2023
2    1/3/2023
3    1/4/2023
4    1/5/2023
5    1/6/2023
Name: day, dtype: object

In [13]:
df.event # or df['event']

0     Rain
1    Sunny
2     Snow
3     Snow
4     Rain
5    Sunny
Name: event, dtype: object

In [14]:
type(df['event'])

pandas.core.series.Series

In [15]:
# You can choose what column you want to print it 
df[['event','day']]

Unnamed: 0,event,day
0,Rain,1/1/2023
1,Sunny,1/2/2023
2,Snow,1/3/2023
3,Snow,1/4/2023
4,Rain,1/5/2023
5,Sunny,1/6/2023


# Operations with Your Dataframe


In [17]:
df['temperature'].max()

35

In [18]:
df['temperature'].min()

24

In [19]:
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.5,4.666667
std,3.885872,2.33809
min,24.0,2.0
25%,29.0,2.5
50%,32.0,5.0
75%,32.0,6.75
max,35.0,7.0


In [21]:
 df[df.temperature==df['temperature'].max()]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2023,35,7,Sunny


In [23]:
df[['day','temperature']][df.temperature==df['temperature'].max()]

Unnamed: 0,day,temperature
1,1/2/2023,35


In [24]:
df.index


RangeIndex(start=0, stop=6, step=1)

In [25]:
df.set_index('day')

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2023,32,6,Rain
1/2/2023,35,7,Sunny
1/3/2023,28,2,Snow
1/4/2023,24,7,Snow
1/5/2023,32,4,Rain
1/6/2023,32,2,Sunny


In [29]:
df.set_index('day',inplace=True)

In [30]:
df.loc['1/4/2023']

temperature      24
windspeed         7
event          Snow
Name: 1/4/2023, dtype: object

In [31]:
df


Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2023,32,6,Rain
1/2/2023,35,7,Sunny
1/3/2023,28,2,Snow
1/4/2023,24,7,Snow
1/5/2023,32,4,Rain
1/6/2023,32,2,Sunny


In [32]:
df.reset_index(inplace=True)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2023,32,6,Rain
1,1/2/2023,35,7,Sunny
2,1/3/2023,28,2,Snow
3,1/4/2023,24,7,Snow
4,1/5/2023,32,4,Rain
5,1/6/2023,32,2,Sunny


In [33]:
df.set_index('event',inplace=True)

In [34]:
df


Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2023,32,6
Sunny,1/2/2023,35,7
Snow,1/3/2023,28,2
Snow,1/4/2023,24,7
Rain,1/5/2023,32,4
Sunny,1/6/2023,32,2


In [35]:
df.loc['Snow']

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Snow,1/3/2023,28,2
Snow,1/4/2023,24,7


In [36]:
df.reset_index(inplace=True)
df

Unnamed: 0,event,day,temperature,windspeed
0,Rain,1/1/2023,32,6
1,Sunny,1/2/2023,35,7
2,Snow,1/3/2023,28,2
3,Snow,1/4/2023,24,7
4,Rain,1/5/2023,32,4
5,Sunny,1/6/2023,32,2
