__1. Pandas: Series creation and indexing__

In [2]:
import pandas as pd

In [3]:
grades = pd.Series(range(80,100,2))
print(grades)

0    80
1    82
2    84
3    86
4    88
5    90
6    92
7    94
8    96
9    98
dtype: int64


In [4]:
print(grades.describe())
print(len(grades))

count    10.000000
mean     89.000000
std       6.055301
min      80.000000
25%      84.500000
50%      89.000000
75%      93.500000
max      98.000000
dtype: float64
10


In [7]:
#creating a series with custom indeices
height = pd.Series([175, 184, 170], index=['Kim', 'Kwon', 'Lee'])
print(height)
print(height.Kwon, height['Kwon'], height[1])

Kim     175
Kwon    184
Lee     170
dtype: int64
184 184 184


In [8]:
nations = pd.Series({'Korea':82, 'Japan':81, 'China':'cn'})
print(nations)

Korea    82
Japan    81
China    cn
dtype: object


__2. Pandas: DataFrames creation and indexing__

In [9]:
season_temps = pd.DataFrame({'Spring':[10,14,18], 'Summer':[24,27,30], 'Fall': [24,21,18], 'Winter':[8,0,-5]})
print(season_temps)

   Spring  Summer  Fall  Winter
0      10      24    24       8
1      14      27    21       0
2      18      30    18      -5


In [10]:
scores = {'Kim':[87, 96, 70], 'Park':[100, 87, 90], 'Kwon':[100, 90, 95], 'Lee':[83, 65, 85]}
scores_df = pd.DataFrame(scores)
print(scores_df)

   Kim  Park  Kwon  Lee
0   87   100   100   83
1   96    87    90   65
2   70    90    95   85


In [11]:
scores_ni = pd.DataFrame(scores, index = ['Math', 'Econ', 'Physics'])
print(scores_ni)
scores_df.index = ['Math', 'Econ', 'Physics']
print(scores_df)

         Kim  Park  Kwon  Lee
Math      87   100   100   83
Econ      96    87    90   65
Physics   70    90    95   85
         Kim  Park  Kwon  Lee
Math      87   100   100   83
Econ      96    87    90   65
Physics   70    90    95   85


__3. Pandas: DataFrame slicing__

In [12]:
print(f'{season_temps.loc[0]}\n{season_temps.iloc[0]}')

Spring    10
Summer    24
Fall      24
Winter     8
Name: 0, dtype: int64
Spring    10
Summer    24
Fall      24
Winter     8
Name: 0, dtype: int64


In [14]:
print(f'{scores_ni.loc["Math":"Econ"]}\n{scores_df.iloc[:2,:3]}')

      Kim  Park  Kwon  Lee
Math   87   100   100   83
Econ   96    87    90   65
      Kim  Park  Kwon
Math   87   100   100
Econ   96    87    90


In [16]:
print(f'{scores_df.loc[["Math","Physics"]]}\n{scores_df.iloc[[0,2],:3]}')

         Kim  Park  Kwon  Lee
Math      87   100   100   83
Physics   70    90    95   85
         Kim  Park  Kwon
Math      87   100   100
Physics   70    90    95


In [30]:
print(f'{scores_ni.loc["Math":"Physics", ["Kim","Kwon"]]}\n{scores_df.iloc[[0,2], 0:2]}')

         Kim  Kwon
Math      87   100
Econ      96    90
Physics   70    95
         Kim  Park
Math      87   100
Physics   70    90


In [23]:
print(f'{scores_ni.loc[["Math", "Physics"],["Kim","Kwon"]]}')

         Kim  Kwon
Math      87   100
Physics   70    95


In [24]:
print(scores_df)

         Kim  Park  Kwon  Lee
Math      87   100   100   83
Econ      96    87    90   65
Physics   70    90    95   85


In [26]:
print(scores_df.at['Econ','Kwon'], scores_df.iat[1,3])

90 65


__4. Pandas: Boolean indexing__

In [32]:
scores_df[scores_df >= 90]

Unnamed: 0,Kim,Park,Kwon,Lee
Math,,100.0,100,
Econ,96.0,,90,
Physics,,90.0,95,


In [33]:
scores_df[(scores_df < 90) & (scores_df > 70)]

Unnamed: 0,Kim,Park,Kwon,Lee
Math,87.0,,,83.0
Econ,,87.0,,
Physics,,,,85.0


__5. Pandas: Descriptive statistics__

In [34]:
pd.set_option('precision', 3)
print(scores_df.describe())

          Kim     Park   Kwon     Lee
count   3.000    3.000    3.0   3.000
mean   84.333   92.333   95.0  77.667
std    13.204    6.807    5.0  11.015
min    70.000   87.000   90.0  65.000
25%    78.500   88.500   92.5  74.000
50%    87.000   90.000   95.0  83.000
75%    91.500   95.000   97.5  84.000
max    96.000  100.000  100.0  85.000


In [35]:
scores_df.mean()

Kim     84.333
Park    92.333
Kwon    95.000
Lee     77.667
dtype: float64

__6. Pandas: Transposing__

In [36]:
scores_df.T

Unnamed: 0,Math,Econ,Physics
Kim,87,96,70
Park,100,87,90
Kwon,100,90,95
Lee,83,65,85


In [37]:
scores_df.T.describe()

Unnamed: 0,Math,Econ,Physics
count,4.0,4.0,4.0
mean,92.5,84.5,85.0
std,8.813,13.528,10.801
min,83.0,65.0,70.0
25%,86.0,81.5,81.25
50%,93.5,88.5,87.5
75%,100.0,91.5,91.25
max,100.0,96.0,95.0


__7. Pandas: Sorting by index and values__

In [38]:
season_temps.sort_index(ascending=False)

Unnamed: 0,Spring,Summer,Fall,Winter
2,18,30,18,-5
1,14,27,21,0
0,10,24,24,8


In [39]:
scores_df.sort_index()

Unnamed: 0,Kim,Park,Kwon,Lee
Econ,96,87,90,65
Math,87,100,100,83
Physics,70,90,95,85


In [40]:
scores_df.sort_index(axis=1)

Unnamed: 0,Kim,Kwon,Lee,Park
Math,87,100,83,100
Econ,96,90,65,87
Physics,70,95,85,90


In [41]:
scores_df.sort_values(by = 'Econ', axis=1, ascending=False)

Unnamed: 0,Kim,Kwon,Park,Lee
Math,87,100,100,83
Econ,96,90,87,65
Physics,70,95,90,85


In [42]:
scores_df.T.sort_values(by = 'Econ', ascending=False)

Unnamed: 0,Math,Econ,Physics
Kim,87,96,70
Kwon,100,90,95
Park,100,87,90
Lee,83,65,85


__8. Pandas: One hot vector__

In [46]:
auto_firms = ['Hundai', 'Honda', 'Kia', 'Audi', 'Benz', 'Hundai', 'Benz', 'Audi', 'Hundai', 'Kia', 'Honda', 'Kia', 'Audi', 'Hundai', 'Benz']
Year = list(range(1990,2005,1)); Rank = list(range(15))
auto_df = pd.DataFrame({'Year':Year, 'Rank':Rank, 'Maker':auto_firms})
print(auto_df)

    Year  Rank   Maker
0   1990     0  Hundai
1   1991     1   Honda
2   1992     2     Kia
3   1993     3    Audi
4   1994     4    Benz
5   1995     5  Hundai
6   1996     6    Benz
7   1997     7    Audi
8   1998     8  Hundai
9   1999     9     Kia
10  2000    10   Honda
11  2001    11     Kia
12  2002    12    Audi
13  2003    13  Hundai
14  2004    14    Benz


In [47]:
am_onehot = pd.get_dummies(auto_df['Maker'])
print(am_onehot)

    Audi  Benz  Honda  Hundai  Kia
0      0     0      0       1    0
1      0     0      1       0    0
2      0     0      0       0    1
3      1     0      0       0    0
4      0     1      0       0    0
5      0     0      0       1    0
6      0     1      0       0    0
7      1     0      0       0    0
8      0     0      0       1    0
9      0     0      0       0    1
10     0     0      1       0    0
11     0     0      0       0    1
12     1     0      0       0    0
13     0     0      0       1    0
14     0     1      0       0    0
