## import

In [3]:
import pandas as pd
import numpy as np
import requests

## Чтение JSON и настройка отображения float

In [4]:
df = pd.read_json("/content/auto.json")
pd.options.display.float_format = "{:.2f}".format

df.head()

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,Y163O8161RUS,2,3200.0,Ford,Focus
1,E432XX77RUS,1,6500.0,Toyota,Camry
2,7184TT36RUS,1,2100.0,Ford,Focus
3,X582HE161RUS,2,2000.0,Ford,Focus
4,92918M178RUS,1,5700.0,Ford,Focus


## Создаем sample из 200 строк

In [5]:
sample = df.sample(n=200, random_state=21, replace=True).reset_index(drop=True)

concat_rows = pd.concat([df, sample], ignore_index=True)
concat_rows.head()

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,Y163O8161RUS,2,3200.0,Ford,Focus
1,E432XX77RUS,1,6500.0,Toyota,Camry
2,7184TT36RUS,1,2100.0,Ford,Focus
3,X582HE161RUS,2,2000.0,Ford,Focus
4,92918M178RUS,1,5700.0,Ford,Focus


## Добавляем столбец Year(генерируем года)

In [6]:
np.random.seed(21)
years = np.random.randint(1980, 2020, size=len(concat_rows))
concat_rows["Year"] = years

fines = concat_rows.copy()
fines.head()

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year
0,Y163O8161RUS,2,3200.0,Ford,Focus,1989
1,E432XX77RUS,1,6500.0,Toyota,Camry,1995
2,7184TT36RUS,1,2100.0,Ford,Focus,1984
3,X582HE161RUS,2,2000.0,Ford,Focus,2015
4,92918M178RUS,1,5700.0,Ford,Focus,2014


## Открываем surname.json. Создаем DataFrame owners

In [7]:
surnames_data = pd.read_json("/content/surname.json")

surnames_data.columns = surnames_data.iloc[0]
surnames_data = surnames_data.drop(0).reset_index(drop=True)
surnames_data.head()

Unnamed: 0,NAME,COUNT,RANK
0,ADAMS,427865,42
1,ALLEN,482607,33
2,ALVAREZ,233983,92
3,ANDERSON,784404,15
4,BAILEY,277845,72


In [8]:
surnames = surnames_data["NAME"].str.replace(r"[^A-Za-z]", "", regex=True)

unique_cars = fines["CarNumber"].unique()
owners_surnames = surnames.sample(n=len(unique_cars), random_state=21, replace=True).reset_index(drop=True)

owners = pd.DataFrame({"CarNumber": unique_cars, "SURNAME": owners_surnames})
owners.head()

Unnamed: 0,CarNumber,SURNAME
0,Y163O8161RUS,RICHARDSON
1,E432XX77RUS,ROSS
2,7184TT36RUS,MORGAN
3,X582HE161RUS,BAILEY
4,92918M178RUS,LOPEZ


## Добавляем новые строки

In [9]:
new_rows = pd.DataFrame({
    "CarNumber": ["TEST1", "TEST2", "TEST3", "TEST4", "TEST5"],
    "Refund": [1, 2, 1, 3, 2],
    "Fines": [1000, 2500, 500, 3200, 1800],
    "Make": ["Ford", "Toyota", "Nissan", "Kia", "BMW"],
    "Model": ["Focus", "Camry", "Altima", "Rio", "X5"],
    "Year": [2001, 2005, 2010, 2015, 2018]
})
fines = pd.concat([fines, new_rows], ignore_index=True)

owners = owners.iloc[:-20]
extra_owners = pd.DataFrame({
    "CarNumber": ["OWN1", "OWN2", "OWN3"],
    "SURNAME": ["Brown", "Smith", "Johnson"]
})
owners = pd.concat([owners, extra_owners], ignore_index=True)
owners.tail()

Unnamed: 0,CarNumber,SURNAME
509,O50197197RUS,WRIGHT
510,7608EE777RUS,HILL
511,OWN1,Brown
512,OWN2,Smith
513,OWN3,Johnson


## Join-операции

In [10]:
inner_join = pd.merge(fines, owners, on="CarNumber", how="inner")
outer_join = pd.merge(fines, owners, on="CarNumber", how="outer")
left_join  = pd.merge(fines, owners, on="CarNumber", how="left")
right_join = pd.merge(fines, owners, on="CarNumber", how="right")

inner_join.head()

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,Y163O8161RUS,2,3200.0,Ford,Focus,1989,RICHARDSON
1,E432XX77RUS,1,6500.0,Toyota,Camry,1995,ROSS
2,7184TT36RUS,1,2100.0,Ford,Focus,1984,MORGAN
3,X582HE161RUS,2,2000.0,Ford,Focus,2015,BAILEY
4,92918M178RUS,1,5700.0,Ford,Focus,2014,LOPEZ


## Pivot table: сумма штрафов по годам и моделям

In [11]:
pivot = pd.pivot_table(
    fines,
    values="Fines",
    index=["Make", "Model"],
    columns="Year",
    aggfunc="sum"
)
pivot.head()

Unnamed: 0_level_0,Year,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
Make,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Audi,,,,,,,,,,,,...,,,,,,,,,,
BMW,,,,,,,,,,,,...,,,3000.0,,,8594.59,,,6500.0,
BMW,X5,,,,,,,,,,,...,,,,,,,,,1800.0,
Ford,Focus,62394.59,395589.17,140383.76,63100.0,111294.59,189583.76,88994.59,121900.0,95989.17,115500.0,...,120183.76,86689.17,112700.0,145494.59,158894.59,203694.59,72594.59,272200.0,285194.59,101100.0
Ford,Mondeo,,,,,,,,,,8600.0,...,,,34400.0,,,,46200.0,,,


## Сохранение CSV

In [12]:
fines.to_csv("fines.csv", index=False)
owners.to_csv("owners.csv", index=False)