# Series

In [49]:
import pandas as pd
import numpy as np

In [50]:
grades = {
            "Atıl": 50,
            "James": 60,
            "Lars": 30
          }

In [51]:
pd.Series(grades)

Atıl     50
James    60
Lars     30
dtype: int64

In [52]:
contest1 = pd.Series(data=[10,5,100], index=['A','B','C'])
contest2 = pd.Series(data=[50,35,40], index=['A','B','C'])
contest3 = contest1 + contest2
contest3

A     60
B     40
C    140
dtype: int64

In [53]:
different_series = pd.Series(data=[20,30,40], index=['A','B','D']) # D is not in contest2
new_series = different_series + contest2 # D is not in contest2, so it will be NaN
new_series # NaN means Not a Number

A    70.0
B    65.0
C     NaN
D     NaN
dtype: float64

# DataFrame

In [54]:
data = np.random.randn(4,3)

In [55]:
data

array([[ 0.61153411,  0.96766356,  0.14679563],
       [-1.24993968, -0.66431773,  0.28833649],
       [ 0.16182811, -0.05420079, -1.70803619],
       [ 0.49221681, -1.24593247, -0.02649033]])

In [56]:
df = pd.DataFrame(data, columns=['Column1', 'Column2', 'Column3'], index=['Row1', 'Row2', 'Row3', 'Row4'])

In [57]:
df

Unnamed: 0,Column1,Column2,Column3
Row1,0.611534,0.967664,0.146796
Row2,-1.24994,-0.664318,0.288336
Row3,0.161828,-0.054201,-1.708036
Row4,0.492217,-1.245932,-0.02649


In [58]:
df['Column1'] # Accessing a single column

Row1    0.611534
Row2   -1.249940
Row3    0.161828
Row4    0.492217
Name: Column1, dtype: float64

In [59]:
df_loc = df.loc['Row1'] # Accessing a single row by label
df_iloc = df.iloc[0] # Accessing a single row by index

print(df_loc)
print("*" * 50)
print(df_iloc)

Column1    0.611534
Column2    0.967664
Column3    0.146796
Name: Row1, dtype: float64
**************************************************
Column1    0.611534
Column2    0.967664
Column3    0.146796
Name: Row1, dtype: float64


In [60]:
df[['Column1', 'Column3']] # Accessing multiple columns

Unnamed: 0,Column1,Column3
Row1,0.611534,0.146796
Row2,-1.24994,0.288336
Row3,0.161828,-1.708036
Row4,0.492217,-0.02649


In [61]:
df[0:2] # Accessing multiple rows by index

Unnamed: 0,Column1,Column2,Column3
Row1,0.611534,0.967664,0.146796
Row2,-1.24994,-0.664318,0.288336


In [62]:
df.iloc[:,1]

Row1    0.967664
Row2   -0.664318
Row3   -0.054201
Row4   -1.245932
Name: Column2, dtype: float64

In [63]:
df['Extra'] = 10 # Adding a new column
df

Unnamed: 0,Column1,Column2,Column3,Extra
Row1,0.611534,0.967664,0.146796,10
Row2,-1.24994,-0.664318,0.288336,10
Row3,0.161828,-0.054201,-1.708036,10
Row4,0.492217,-1.245932,-0.02649,10


In [64]:
df.drop("Extra", axis=1, inplace=True) # Dropping a column # What does it mean inplace=True? # It means that the change will be made in the original dataframe

In [65]:
df

Unnamed: 0,Column1,Column2,Column3
Row1,0.611534,0.967664,0.146796
Row2,-1.24994,-0.664318,0.288336
Row3,0.161828,-0.054201,-1.708036
Row4,0.492217,-1.245932,-0.02649


In [66]:
# add a new row
df.loc['Row5'] = [1,2,3]
df

Unnamed: 0,Column1,Column2,Column3
Row1,0.611534,0.967664,0.146796
Row2,-1.24994,-0.664318,0.288336
Row3,0.161828,-0.054201,-1.708036
Row4,0.492217,-1.245932,-0.02649
Row5,1.0,2.0,3.0


In [69]:
df.loc['Row3', 'Column1'] = 777 # Changing a specific value
# df.at['Row3', 'Column1'] = 777 # Another way to change a specific value
# df.iat[2,0] = 777 # Another way to change a specific value
# df.iloc[2,0] = 777 # Another way to change a specific value
# df.loc['Row3']['Column1'] = 777 # Another way to change a specific value

In [68]:
df

Unnamed: 0,Column1,Column2,Column3
Row1,0.611534,0.967664,0.146796
Row2,-1.24994,-0.664318,0.288336
Row3,777.0,-0.054201,-1.708036
Row4,0.492217,-1.245932,-0.02649
Row5,1.0,2.0,3.0


In [72]:
reset_df = df.reset_index()
reset_df

Unnamed: 0,index,Column1,Column2,Column3
0,Row1,0.611534,0.967664,0.146796
1,Row2,-1.24994,-0.664318,0.288336
2,Row3,777.0,-0.054201,-1.708036
3,Row4,0.492217,-1.245932,-0.02649
4,Row5,1.0,2.0,3.0


In [74]:
new_indices = ['a','b','c','d','e']
reset_df['NewIndex'] = new_indices
reset_df.set_index('NewIndex', inplace=True)
reset_df

Unnamed: 0_level_0,index,Column1,Column2,Column3
NewIndex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
a,Row1,0.611534,0.967664,0.146796
b,Row2,-1.24994,-0.664318,0.288336
c,Row3,777.0,-0.054201,-1.708036
d,Row4,0.492217,-1.245932,-0.02649
e,Row5,1.0,2.0,3.0


# multi index

In [75]:
first_index = ["Simpson","Simpson","Simpson","South Park", "South Park", "South Park"]
inner_index = ["Homer","Bart","Marge","Cartman","Kenny","Kyle"]
zipped_index = list(zip(first_index, inner_index))
zipped_index

[('Simpson', 'Homer'),
 ('Simpson', 'Bart'),
 ('Simpson', 'Marge'),
 ('South Park', 'Cartman'),
 ('South Park', 'Kenny'),
 ('South Park', 'Kyle')]

In [77]:
zipped_index = pd.MultiIndex.from_tuples(zipped_index) # Create a multi index from the zipped index
zipped_index

MultiIndex([(   'Simpson',   'Homer'),
            (   'Simpson',    'Bart'),
            (   'Simpson',   'Marge'),
            ('South Park', 'Cartman'),
            ('South Park',   'Kenny'),
            ('South Park',    'Kyle')],
           )

In [78]:
sample_values = np.ones((6,2))
sample_values

array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.]])

In [79]:
big_df = pd.DataFrame(sample_values, index=zipped_index, columns=["Age", "Salary"])
big_df

Unnamed: 0,Unnamed: 1,Age,Salary
Simpson,Homer,1.0,1.0
Simpson,Bart,1.0,1.0
Simpson,Marge,1.0,1.0
South Park,Cartman,1.0,1.0
South Park,Kenny,1.0,1.0
South Park,Kyle,1.0,1.0


In [80]:
big_df["Age"]

Simpson     Homer      1.0
            Bart       1.0
            Marge      1.0
South Park  Cartman    1.0
            Kenny      1.0
            Kyle       1.0
Name: Age, dtype: float64

In [82]:
big_df["Age"]["Simpson"] # Accessing a specific level of the multi index
# big_df.loc["Simpson"] # Another way to access a specific level of the multi index

Homer    1.0
Bart     1.0
Marge    1.0
Name: Age, dtype: float64

In [83]:
big_df["Age"]["Simpson"]["Homer"] # Accessing a specific value in the multi index
# big_df.loc["Simpson"].loc["Homer"] # Another way to access a specific value in the multi index

1.0