## Imports

In [2]:
import pandas as pd
import numpy as np
import requests

## Reading file

In [66]:
df = pd.read_json('../data/auto.json', orient='records')
df

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,Y163O8161RUS,2,3200.00,Ford,Focus
1,E432XX77RUS,1,6500.00,Toyota,Camry
2,7184TT36RUS,1,2100.00,Ford,Focus
3,X582HE161RUS,2,2000.00,Ford,Focus
4,92918M178RUS,1,5700.00,Ford,Focus
...,...,...,...,...,...
720,Y163O8161RUS,2,1600.00,Ford,Focus
721,M0309X197RUS,1,22300.00,Ford,Focus
722,O673E8197RUS,2,600.00,Ford,Focus
723,8610T8154RUS,1,2000.00,Ford,Focus


Change display format of float nums

In [67]:
pd.options.display.float_format = '{:.2f}'.format
df

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,Y163O8161RUS,2,3200.00,Ford,Focus
1,E432XX77RUS,1,6500.00,Toyota,Camry
2,7184TT36RUS,1,2100.00,Ford,Focus
3,X582HE161RUS,2,2000.00,Ford,Focus
4,92918M178RUS,1,5700.00,Ford,Focus
...,...,...,...,...,...
720,Y163O8161RUS,2,1600.00,Ford,Focus
721,M0309X197RUS,1,22300.00,Ford,Focus
722,O673E8197RUS,2,600.00,Ford,Focus
723,8610T8154RUS,1,2000.00,Ford,Focus


## Enriching dataframe

Create a sample with 200 new observations with random_state = 21

In [68]:
sample = df.sample(200, random_state=21)

sample['Refund'] = np.random.randint(1, df.shape[0], size=sample.shape[0])
sample['Fines'] = np.random.randint(1, df.shape[0], size=sample.shape[0])
sample['Refund'] = sample.Refund.apply(lambda x: df.Refund.iloc[x])
sample['Fines'] = sample.Fines.apply(lambda x: df.Fines.iloc[x])
sample

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
445,M0299X197RUS,1,5000.00,Ford,Focus
22,83298C154RUS,2,1500.00,Ford,Focus
93,H957HY161RUS,2,1900.00,Ford,Focus
173,T941CC96RUS,2,4000.00,Ford,Focus
697,H966HY161RUS,1,4500.00,Ford,Focus
...,...,...,...,...,...
14,8182XX154RUS,1,500.00,Ford,Focus
623,X796TH96RUS,2,4800.00,Ford,Focus
498,T011MY163RUS,2,8594.59,Ford,Focus
536,T341CC96RUS,1,3500.00,Volkswagen,Passat


Concatenate the sample with the initial dataframe to a new dataframe concat_rows

In [69]:
concat_rows = pd.concat([df, sample])
concat_rows

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,Y163O8161RUS,2,3200.00,Ford,Focus
1,E432XX77RUS,1,6500.00,Toyota,Camry
2,7184TT36RUS,1,2100.00,Ford,Focus
3,X582HE161RUS,2,2000.00,Ford,Focus
4,92918M178RUS,1,5700.00,Ford,Focus
...,...,...,...,...,...
14,8182XX154RUS,1,500.00,Ford,Focus
623,X796TH96RUS,2,4800.00,Ford,Focus
498,T011MY163RUS,2,8594.59,Ford,Focus
536,T341CC96RUS,1,3500.00,Volkswagen,Passat


## Enrich the dataframe concat_rows by a new column with the data generated

Use np.random.seed(21) before generating the years

In [32]:
np.random.seed(21)

Create a series with the name Year using random integers from 1980 to 2019

In [70]:
years = pd.Series(np.random.randint(1980, 2020, concat_rows.shape[0]), name='Year', index=concat_rows.index)
years

0      2015
1      1987
2      1991
3      2017
4      1991
       ... 
14     1994
623    1983
498    2000
536    1999
520    1995
Name: Year, Length: 925, dtype: int64

Concatenate the series with the dataframe and name it fines

In [71]:
fines = pd.concat([concat_rows, years], axis='columns')
fines

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year
0,Y163O8161RUS,2,3200.00,Ford,Focus,2015
1,E432XX77RUS,1,6500.00,Toyota,Camry,1987
2,7184TT36RUS,1,2100.00,Ford,Focus,1991
3,X582HE161RUS,2,2000.00,Ford,Focus,2017
4,92918M178RUS,1,5700.00,Ford,Focus,1991
...,...,...,...,...,...,...
14,8182XX154RUS,1,500.00,Ford,Focus,1994
623,X796TH96RUS,2,4800.00,Ford,Focus,1983
498,T011MY163RUS,2,8594.59,Ford,Focus,2000
536,T341CC96RUS,1,3500.00,Volkswagen,Passat,1999


## Enrich the dataframe with the data from another dataframe

Create a new dataframe with the car numbers and their owners

In [72]:
surnames = pd.read_json('../data/surname.json', orient='values')
surnames.columns = surnames.iloc[0]
surnames.drop(0, inplace=True)
surnames

Unnamed: 0,NAME,COUNT,RANK
1,ADAMS,427865,42
2,ALLEN,482607,33
3,ALVAREZ,233983,92
4,ANDERSON,784404,15
5,BAILEY,277845,72
...,...,...,...
96,WILLIAMS,1625252,3
97,WILSON,801882,14
98,WOOD,250715,84
99,WRIGHT,458980,35


In [73]:
new_series = surnames.sample(fines.CarNumber.unique().shape[0], random_state=21, replace=True).NAME
new_series

74    RICHARDSON
80          ROSS
57        MORGAN
5         BAILEY
49         LOPEZ
         ...    
10      CAMPBELL
32          HALL
6          BAKER
21          DIAZ
57        MORGAN
Name: NAME, Length: 531, dtype: object

In [74]:
owners = pd.DataFrame(zip(fines.CarNumber.unique(), new_series), columns=['CarNumber', 'SURNAME'])
owners

Unnamed: 0,CarNumber,SURNAME
0,Y163O8161RUS,RICHARDSON
1,E432XX77RUS,ROSS
2,7184TT36RUS,MORGAN
3,X582HE161RUS,BAILEY
4,92918M178RUS,LOPEZ
...,...,...
526,O136HO197RUS,CAMPBELL
527,O22097197RUS,HALL
528,M0309X197RUS,BAKER
529,O673E8197RUS,DIAZ


## Append 5 new observations of your own to fines df

In [75]:
new_rows = [['A123HKRUS', 1, 5600.00, 'Ford', 'Focus', 2019],
            ['E456TYRUS', 2, 100.00, 'Toyota', 'Corolla', 1984],
            ['X789CBRUS', 1, 560.00, 'Skoda', 'Octavia', 2012],
            ['O987PMRUS', 2, 1200.00, 'Ford', 'Focus', 1995],
            ['P654MBRUS', 3, 550.00, 'Volkswagen', 'Passat', 1999]]

new_rows = pd.DataFrame(new_rows, columns=fines.columns)

new_rows

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year
0,A123HKRUS,1,5600.0,Ford,Focus,2019
1,E456TYRUS,2,100.0,Toyota,Corolla,1984
2,X789CBRUS,1,560.0,Skoda,Octavia,2012
3,O987PMRUS,2,1200.0,Ford,Focus,1995
4,P654MBRUS,3,550.0,Volkswagen,Passat,1999


In [76]:
fines = pd.concat([fines, new_rows], ignore_index=True)


In [77]:
fines

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year
0,Y163O8161RUS,2,3200.00,Ford,Focus,2015
1,E432XX77RUS,1,6500.00,Toyota,Camry,1987
2,7184TT36RUS,1,2100.00,Ford,Focus,1991
3,X582HE161RUS,2,2000.00,Ford,Focus,2017
4,92918M178RUS,1,5700.00,Ford,Focus,1991
...,...,...,...,...,...,...
925,A123HKRUS,1,5600.00,Ford,Focus,2019
926,E456TYRUS,2,100.00,Toyota,Corolla,1984
927,X789CBRUS,1,560.00,Skoda,Octavia,2012
928,O987PMRUS,2,1200.00,Ford,Focus,1995


## Delete 20 last owners and add 3 new ones

In [78]:
owners.drop(owners.tail(20).index, inplace=True)

In [79]:
owners

Unnamed: 0,CarNumber,SURNAME
0,Y163O8161RUS,RICHARDSON
1,E432XX77RUS,ROSS
2,7184TT36RUS,MORGAN
3,X582HE161RUS,BAILEY
4,92918M178RUS,LOPEZ
...,...,...
506,T914CT197RUS,HERNANDEZ
507,E41977152RUS,BAKER
508,9464EX178RUS,MARTIN
509,O50197197RUS,WRIGHT


In [81]:
new_rows2 = [['K123HARUS', 'SAMBER'],
            ['Y456TERUS', 'COLDWELL'],
            ['B789CXRUS', 'SPRINFER']]

new_rows2 = pd.DataFrame(new_rows2, columns=owners.columns)

new_rows2

Unnamed: 0,CarNumber,SURNAME
0,K123HARUS,SAMBER
1,Y456TERUS,COLDWELL
2,B789CXRUS,SPRINFER


In [82]:
owners = pd.concat([owners, new_rows2], ignore_index=True)
owners

Unnamed: 0,CarNumber,SURNAME
0,Y163O8161RUS,RICHARDSON
1,E432XX77RUS,ROSS
2,7184TT36RUS,MORGAN
3,X582HE161RUS,BAILEY
4,92918M178RUS,LOPEZ
...,...,...
509,O50197197RUS,WRIGHT
510,7608EE777RUS,HILL
511,K123HARUS,SAMBER
512,Y456TERUS,COLDWELL


## Different joins

The new dataframe should have only the car numbers that exist in both
dataframes

In [83]:
pd.merge(fines, owners, how='inner', left_on='CarNumber', right_on='CarNumber')

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,Y163O8161RUS,2,3200.00,Ford,Focus,2015,RICHARDSON
1,Y163O8161RUS,2,1600.00,Ford,Focus,2004,RICHARDSON
2,E432XX77RUS,1,6500.00,Toyota,Camry,1987,ROSS
3,E432XX77RUS,2,13000.00,Toyota,Camry,1986,ROSS
4,7184TT36RUS,1,2100.00,Ford,Focus,1991,MORGAN
...,...,...,...,...,...,...,...
894,E41977152RUS,2,2400.00,Ford,Focus,2017,BAKER
895,9464EX178RUS,2,2100.00,Ford,Focus,1987,MARTIN
896,O50197197RUS,2,7800.00,Ford,Focus,1983,WRIGHT
897,7608EE777RUS,1,4000.00,Skoda,Octavia,1981,HILL


The new dataframe should have all the car numbers that exist in both
dataframes

In [84]:
pd.merge(fines, owners, how='outer', left_on='CarNumber', right_on='CarNumber')

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,Y163O8161RUS,2.00,3200.00,Ford,Focus,2015.00,RICHARDSON
1,Y163O8161RUS,2.00,1600.00,Ford,Focus,2004.00,RICHARDSON
2,E432XX77RUS,1.00,6500.00,Toyota,Camry,1987.00,ROSS
3,E432XX77RUS,2.00,13000.00,Toyota,Camry,1986.00,ROSS
4,7184TT36RUS,1.00,2100.00,Ford,Focus,1991.00,MORGAN
...,...,...,...,...,...,...,...
928,O987PMRUS,2.00,1200.00,Ford,Focus,1995.00,
929,P654MBRUS,3.00,550.00,Volkswagen,Passat,1999.00,
930,K123HARUS,,,,,,SAMBER
931,Y456TERUS,,,,,,COLDWELL


The new dataframe should have only the car numbers from the fines dataframe

In [85]:
pd.merge(fines, owners, how='left', left_on='CarNumber', right_on='CarNumber')

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,Y163O8161RUS,2,3200.00,Ford,Focus,2015,RICHARDSON
1,E432XX77RUS,1,6500.00,Toyota,Camry,1987,ROSS
2,7184TT36RUS,1,2100.00,Ford,Focus,1991,MORGAN
3,X582HE161RUS,2,2000.00,Ford,Focus,2017,BAILEY
4,92918M178RUS,1,5700.00,Ford,Focus,1991,LOPEZ
...,...,...,...,...,...,...,...
925,A123HKRUS,1,5600.00,Ford,Focus,2019,
926,E456TYRUS,2,100.00,Toyota,Corolla,1984,
927,X789CBRUS,1,560.00,Skoda,Octavia,2012,
928,O987PMRUS,2,1200.00,Ford,Focus,1995,


The new dataframe should have only the car numbers from the owners
dataframe

In [86]:
pd.merge(fines, owners, how='right', left_on='CarNumber', right_on='CarNumber')

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,Y163O8161RUS,2.00,3200.00,Ford,Focus,2015.00,RICHARDSON
1,Y163O8161RUS,2.00,1600.00,Ford,Focus,2004.00,RICHARDSON
2,E432XX77RUS,1.00,6500.00,Toyota,Camry,1987.00,ROSS
3,E432XX77RUS,2.00,13000.00,Toyota,Camry,1986.00,ROSS
4,7184TT36RUS,1.00,2100.00,Ford,Focus,1991.00,MORGAN
...,...,...,...,...,...,...,...
897,7608EE777RUS,1.00,4000.00,Skoda,Octavia,1981.00,HILL
898,7608EE777RUS,1.00,18000.00,Skoda,Octavia,2008.00,HILL
899,K123HARUS,,,,,,SAMBER
900,Y456TERUS,,,,,,COLDWELL


## Create a pivot table from the fines dataframe

In [87]:
pd.pivot_table(fines,
               index=['Make', 'Model'],
               values='Fines',
               columns='Year')

Unnamed: 0_level_0,Year,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
Make,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Ford,Focus,9139.64,4828.57,12299.04,10526.51,5046.15,15223.21,5609.73,4988.92,8670.16,9800.0,...,10043.41,8543.24,5980.0,10086.67,7527.8,13364.51,13694.98,9688.32,10152.94,6863.14
Ford,Mondeo,,,34400.0,6700.0,,8600.0,,,,1100.0,...,2200.0,,,,,,,,,
Skoda,Octavia,400.0,4000.0,1000.0,7047.29,4000.0,34650.0,3000.0,18900.0,145000.0,4947.29,...,8500.0,5200.0,530.0,800.0,3000.0,1900.0,,8200.0,500.0,
Toyota,Camry,,7500.0,800.0,,,,13000.0,6850.0,10766.67,,...,,500.0,8594.59,,14200.0,,,,,
Toyota,Corolla,2100.0,7600.0,,,300.0,,,6400.0,,,...,3400.0,,900.0,4000.0,,,7800.0,2000.0,,3200.0
Volkswagen,Golf,100.0,3800.0,5800.0,200.0,,,4600.0,,18400.0,,...,5000.0,,300.0,24000.0,200.0,6733.33,168000.0,8594.59,,2200.0
Volkswagen,Jetta,,,,9000.0,4000.0,,,,,,...,,,,500.0,,,,46000.0,,
Volkswagen,Passat,3200.0,,,,3000.0,1600.0,500.0,15000.0,14900.0,,...,9900.0,,,1800.0,,7600.0,,,7650.0,2000.0
Volkswagen,Touareg,,,,,,,6300.0,5800.0,,,...,,,,,,,,,,


## Save both the fines and owners dataframes to CSV files without an index

In [88]:
fines.to_csv('../data/fines.csv', index=False)
owners.to_csv('../data/owners.csv', index=False)