In [1]:
import pandas as pd
import numpy as np

# Dataframes

In [2]:
data = [
    ['Nissan', 'Stanza', 1991, 138, 4, 'MANUAL', 'sedan', 2000],
    ['Hyundai', 'Sonata', 2017, None, 4, 'AUTOMATIC', 'Sedan', 27150],
    ['Lotus', 'Elise', 2010, 218, 4, 'MANUAL', 'convertible', 54990],
    ['GMC', 'Acadia',  2017, 194, 4, 'AUTOMATIC', '4dr SUV', 34450],
    ['Nissan', 'Frontier', 2017, 261, 6, 'MANUAL', 'Pickup', 32340],
]

columns = [
    'Make', 'Model', 'Year', 'Engine HP', 'Engine Cylinders',
    'Transmission Type', 'Vehicle_Style', 'MSRP'
]

In [7]:
df = pd.DataFrame(data, columns=columns)

In [8]:
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


In [10]:
df.head(2)

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150


# Series

In [11]:
df.Make

0     Nissan
1    Hyundai
2      Lotus
3        GMC
4     Nissan
Name: Make, dtype: object

In [12]:
del df['Year']

In [13]:
df.columns

Index(['Make', 'Model', 'Engine HP', 'Engine Cylinders', 'Transmission Type',
       'Vehicle_Style', 'MSRP'],
      dtype='object')

# Index

In [15]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [16]:
df.Make

0     Nissan
1    Hyundai
2      Lotus
3        GMC
4     Nissan
Name: Make, dtype: object

In [18]:
df.loc[1] # Indexing in 1

Make                   Hyundai
Model                   Sonata
Engine HP                  NaN
Engine Cylinders             4
Transmission Type    AUTOMATIC
Vehicle_Style            Sedan
MSRP                     27150
Name: 1, dtype: object

In [19]:
df.loc[[1, 2]]

Unnamed: 0,Make,Model,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
1,Hyundai,Sonata,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,218.0,4,MANUAL,convertible,54990


In [20]:
df.index = ['a', 'b', 'c', 'd', 'e']

In [21]:
df

Unnamed: 0,Make,Model,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
a,Nissan,Stanza,138.0,4,MANUAL,sedan,2000
b,Hyundai,Sonata,,4,AUTOMATIC,Sedan,27150
c,Lotus,Elise,218.0,4,MANUAL,convertible,54990
d,GMC,Acadia,194.0,4,AUTOMATIC,4dr SUV,34450
e,Nissan,Frontier,261.0,6,MANUAL,Pickup,32340


In [22]:
df.iloc[[1, 2, 4]]

Unnamed: 0,Make,Model,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
b,Hyundai,Sonata,,4,AUTOMATIC,Sedan,27150
c,Lotus,Elise,218.0,4,MANUAL,convertible,54990
e,Nissan,Frontier,261.0,6,MANUAL,Pickup,32340


In [23]:
df.reset_index()

Unnamed: 0,index,Make,Model,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,a,Nissan,Stanza,138.0,4,MANUAL,sedan,2000
1,b,Hyundai,Sonata,,4,AUTOMATIC,Sedan,27150
2,c,Lotus,Elise,218.0,4,MANUAL,convertible,54990
3,d,GMC,Acadia,194.0,4,AUTOMATIC,4dr SUV,34450
4,e,Nissan,Frontier,261.0,6,MANUAL,Pickup,32340


In [24]:
df.reset_index(drop=True)

Unnamed: 0,Make,Model,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,218.0,4,MANUAL,convertible,54990
3,GMC,Acadia,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,261.0,6,MANUAL,Pickup,32340


# Element-wise operations

In [26]:
df['Engine HP'] / 100

a    1.38
b     NaN
c    2.18
d    1.94
e    2.61
Name: Engine HP, dtype: float64

# Filtering

# String operations

In [29]:
df['Vehicle_Style']

a          sedan
b          Sedan
c    convertible
d        4dr SUV
e         Pickup
Name: Vehicle_Style, dtype: object

In [30]:
'STR'.lower()

'str'

In [31]:
df['Vehicle_Style'].str.lower()

a          sedan
b          sedan
c    convertible
d        4dr suv
e         pickup
Name: Vehicle_Style, dtype: object

In [35]:
df['Vehicle_Style'].str.replace(' ', '_').str.lower()

a          sedan
b          sedan
c    convertible
d        4dr_suv
e         pickup
Name: Vehicle_Style, dtype: object

# Summarizing operations

In [38]:
df.MSRP.min()

np.int64(2000)

In [39]:
df.MSRP.max()

np.int64(54990)

In [40]:
df.MSRP.describe()

count        5.000000
mean     30186.000000
std      18985.044904
min       2000.000000
25%      27150.000000
50%      32340.000000
75%      34450.000000
max      54990.000000
Name: MSRP, dtype: float64

In [41]:
df.describe().round(2)

Unnamed: 0,Engine HP,Engine Cylinders,MSRP
count,4.0,5.0,5.0
mean,202.75,4.4,30186.0
std,51.3,0.89,18985.04
min,138.0,4.0,2000.0
25%,180.0,4.0,27150.0
50%,206.0,4.0,32340.0
75%,228.75,4.0,34450.0
max,261.0,6.0,54990.0


In [43]:
df.Make.nunique()

4

In [45]:
df.isnull().sum()

Make                 0
Model                0
Engine HP            1
Engine Cylinders     0
Transmission Type    0
Vehicle_Style        0
MSRP                 0
dtype: int64

# Grouping

# Getting the NumPy arrays

In [48]:
df.MSRP.values

array([ 2000, 27150, 54990, 34450, 32340])

In [50]:
df.to_dict(orient='records')

[{'Make': 'Nissan',
  'Model': 'Stanza',
  'Engine HP': 138.0,
  'Engine Cylinders': 4,
  'Transmission Type': 'MANUAL',
  'Vehicle_Style': 'sedan',
  'MSRP': 2000},
 {'Make': 'Hyundai',
  'Model': 'Sonata',
  'Engine HP': nan,
  'Engine Cylinders': 4,
  'Transmission Type': 'AUTOMATIC',
  'Vehicle_Style': 'Sedan',
  'MSRP': 27150},
 {'Make': 'Lotus',
  'Model': 'Elise',
  'Engine HP': 218.0,
  'Engine Cylinders': 4,
  'Transmission Type': 'MANUAL',
  'Vehicle_Style': 'convertible',
  'MSRP': 54990},
 {'Make': 'GMC',
  'Model': 'Acadia',
  'Engine HP': 194.0,
  'Engine Cylinders': 4,
  'Transmission Type': 'AUTOMATIC',
  'Vehicle_Style': '4dr SUV',
  'MSRP': 34450},
 {'Make': 'Nissan',
  'Model': 'Frontier',
  'Engine HP': 261.0,
  'Engine Cylinders': 6,
  'Transmission Type': 'MANUAL',
  'Vehicle_Style': 'Pickup',
  'MSRP': 32340}]