# Shipping Company database

In this project I created the database for a polish shipping company. I assume that the company began its activity in the beginning of year 2012. In this notebook I generate the data for the database and upload it into SQL server. The data are from time period from the beginning of year 2012 to half of year 2020. 

In [65]:
import random 
import pandas as pd
import numpy as np
import datetime
import os
import string
from dateutil.relativedelta import relativedelta

# Loading the files containing all possible names
nazwiska = pd.read_csv('nazwiska.txt')
nazwiska = nazwiska.values.tolist()

imiona_m = pd.read_csv('first-m.txt')
imiona_m = imiona_m.values.tolist()

imiona_f = pd.read_csv('first-f.txt')
imiona_f = imiona_f.values.tolist()

nazwiska = [val for sublist in nazwiska for val in sublist]
imiona_f = [val for sublist in imiona_f for val in sublist]
imiona_m = [val for sublist in imiona_m for val in sublist]

# Random generation of all names

# Male
Imiona_M = random.choices(imiona_m, k=93)
Nazwiska_M = random.sample(nazwiska, k=93)

Nazwiska_1 = [sub.replace('ski', 'ska') for sub in nazwiska]
Nazwiska_2 = [sub.replace('cki', 'cka') for sub in Nazwiska_1]
Nazwiska_3 = [sub.replace('dzki', 'dzka') for sub in Nazwiska_2]

# Female
Imiona_D = random.choices(imiona_f, k=93)
Nazwiska_D = random.sample(Nazwiska_3, k=93)


## Generation of Employees table

This table contains the data of all the people working for the company. It contains information such as: unique employee ID, first name, last name, date of birth, educational background, date of hiring, job title, salary, phone number, bank account number, city and country. The drivers have additional info about their driving licence and Id of their default vehicle.

In [66]:
# Generation of drivers data
##############################################################

# Driver ID
pula_id = list(np.linspace(10000,20000,10001,dtype=int))
WorkerID = random.sample(pula_id, k=200)  # all workers ID
DriverID = WorkerID[0:20]

# First name and last name
FirstName_Driver = Imiona_M[0:16] + Imiona_D[0:4]   
LastName_Driver = Nazwiska_M[0:16] + Nazwiska_D[0:4]

# Date of birth
start_date = datetime.date(1965, 1, 1)  # Period when drivers could be born
end_date = datetime.date(1995, 1, 1)
BirthDate_Driver = []
i=0
while i < 20:
    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange(days_between_dates)
    random_date = start_date + datetime.timedelta(days=random_number_of_days)
    BirthDate_Driver.append(random_date)
    i = i + 1
    
# Data Zatrudnienia 
start_date = datetime.date(2012, 1, 1)  # Period when drivers could be hired
end_date = datetime.date(2020, 6, 6)
HireDate_Driver = []
i = 0
# Assuming that few worked from the very beginning
while i < 16:
    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange(days_between_dates)
    random_date = start_date + datetime.timedelta(days=random_number_of_days)
    HireDate_Driver.append(random_date)
    i = i + 1
    
i = 0
while i < 4:
    HireDate_Driver.append(datetime.date(2012, 1, 1))
    i = i + 1
    
# Salary
pula_zarobkow = list(np.linspace(2000, 4600, 14, dtype = int))
Salary_Driver = random.choices(pula_zarobkow, k = 20) 

# Default vehicle
pula_id_pojazdow = list(np.linspace(1, 20, 20, dtype = int))
DefaultVehicle = random.sample(pula_id_pojazdow, k = 20)  # no repetitions

# Driving licence
pula_driving_licence = ["C + E", "C1 + E"]
DrivingLicence = random.choices(pula_driving_licence, k = 20)

# Phone number
pula_nr_telefonu = list(np.linspace(700000001,800000000,5000, dtype = int))
PhoneNumber = random.sample(pula_nr_telefonu, k = 205)  # pool of all phone numbers - no repetitions
PhoneNumber_Driver = PhoneNumber[0:20]

# Country
Country_Driver = ['Polska' for i in range(20)]

# City
pula_miast = ['Wrocław', 'Wrocław', 'Wrocław', 'Wrocław', 'Wrocław', 'Wrocław', 'Wrocław', 'Chrzanów', 'Wrocław', 'Wrocław', 'Wrocław', "Bielany Wrocławskie", "Żórawina", "Siechnice", "Domasław"]
City_Driver = random.choices(pula_miast, k = 20)

# Bank account
bank1=[]
for i in range(20):
    bank1.append(''.join(random.sample(string.digits, k=10)))
bank2=[]
for i in range(20):
    bank2.append(''.join(random.sample(string.digits, k=10)))
bank3=[]
for i in range(20):
    bank3.append(''.join(random.sample(string.digits, k=6)))

bank = [i + j + k for i, j, k in zip(bank1, bank2, bank3)] 

BankAccount_Driver = bank[0:20]

# Creating DataFrame 
Drivers = pd.DataFrame(data = {'EmployeeID':DriverID, 'FirstName':FirstName_Driver, 'LastName':LastName_Driver, 'BirthDate':BirthDate_Driver,'EducationalBackground':['Wykształcenie średnie']*20,'JobTitle':['Kierowca']*20, 'HireDate':HireDate_Driver, 'Salary':Salary_Driver, 'DefaultVehicle':DefaultVehicle, 'DrivingLicence':DrivingLicence, 'PhoneNumber':PhoneNumber_Driver, 'Country':Country_Driver,'City':City_Driver,'BankAccountNumber':BankAccount_Driver})
Drivers

Unnamed: 0,EmployeeID,FirstName,LastName,BirthDate,EducationalBackground,JobTitle,HireDate,Salary,DefaultVehicle,DrivingLicence,PhoneNumber,Country,City,BankAccountNumber
0,18961,Mariusz,Wiśniewski,1979-08-21,Wykształcenie średnie,Kierowca,2016-08-03,4200,6,C1 + E,783496699,Polska,Bielany Wrocławskie,31742805697129864503253890
1,14788,Damian,Makowski,1979-04-01,Wykształcenie średnie,Kierowca,2014-05-27,3000,17,C1 + E,742648530,Polska,Siechnice,89534216077259148360692805
2,19152,Kazimierz,Urbański,1992-07-08,Wykształcenie średnie,Kierowca,2019-04-09,2800,5,C1 + E,773434687,Polska,Wrocław,50348269711986254730982056
3,14295,Kamil,Kaczmarek,1972-07-25,Wykształcenie średnie,Kierowca,2019-06-02,2800,7,C + E,708981797,Polska,Wrocław,86045123975673012498276814
4,15421,Mateusz,Witkowski,1980-06-28,Wykształcenie średnie,Kierowca,2018-07-26,3400,16,C1 + E,709361873,Polska,Chrzanów,58416097236045189273073296
5,13042,Rafał,Szczepański,1992-12-02,Wykształcenie średnie,Kierowca,2015-01-07,3800,10,C1 + E,773514703,Polska,Wrocław,59768410328037654129861735
6,18467,Marian,Stępień,1989-03-15,Wykształcenie średnie,Kierowca,2014-06-11,4400,3,C1 + E,756691338,Polska,Wrocław,58267309148940126735851397
7,16527,Tomasz,Adamski,1972-06-30,Wykształcenie średnie,Kierowca,2019-11-30,4400,15,C1 + E,728845769,Polska,Wrocław,21085967436483021795138967
8,17951,Mateusz,Jaworski,1968-07-20,Wykształcenie średnie,Kierowca,2019-10-02,4000,20,C1 + E,764592918,Polska,Wrocław,62078951435923078416619437
9,17220,Roman,Michalak,1976-06-14,Wykształcenie średnie,Kierowca,2019-07-07,2400,8,C1 + E,713802761,Polska,Chrzanów,38624059714509372186401659


In [67]:
# Generation of administration employees data
###################################################################################################

# ID
EmployeeID = WorkerID[90:110]

# First name and last name
FirstName_Administration = Imiona_M[60:65] + Imiona_D[30:45]   
LastName_Administration = Nazwiska_M[60:65] + Nazwiska_D[30:45]

# Date of birth
start_date = datetime.date(1965, 1, 1) 
end_date = datetime.date(1995, 1, 1)
BirthDate_Administration= []
i=0
while i < 20:
    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange(days_between_dates)
    random_date = start_date + datetime.timedelta(days=random_number_of_days)
    BirthDate_Administration.append(random_date)
    i = i + 1
    
# Educational Background
pula_poziomu_wyksztalcenia = ["Wykształcenie średnie", "Wykształcenie wyższe"]
EducationalBackground = random.choices(pula_poziomu_wyksztalcenia, k = 20)

# Job title
stanowiska = ["Księgowy"]*5 + ["Główny księgowy"] + ["Pracownik sekretariatu"]*2 + ["Członek zarządu"]*2 + ["Archiwista"] + ["Asystent ds. transportu"]*3 + ["Informatyk"]
JobTitle = random.sample(stanowiska, k = 15)
JobTitle = JobTitle + ["Księgowy","Pracownik sekretariatu","Członek zarządu","Manager ds. transportu","Informatyk"]

# Date of hiring
start_date = datetime.date(2012, 1, 1) 
end_date = datetime.date(2020, 6, 6)
HireDate_Administration = []
i=0
# Assuming that few of them worked from the very beginning
while i < 15:
    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange(days_between_dates)
    random_date = start_date + datetime.timedelta(days=random_number_of_days)
    HireDate_Administration.append(random_date)
    i = i + 1
    
i = 0
while i < 5:
    HireDate_Administration.append(datetime.date(2012, 1, 1))
    i = i + 1
    
# Salary
Salary_Administration = [0]*20
for i in range(20):
    if JobTitle[i] == "Księgowy":
        Salary_Administration[i] = 4000
    elif JobTitle[i] == "Pracownik sekretariatu":
        Salary_Administration[i] = 3500
    elif JobTitle[i] == "Członek zarządu":
        Salary_Administration[i] = 10000
    elif JobTitle[i] == "Archiwista":
        Salary_Administration[i] = 3000
    elif JobTitle[i] == "Manager ds. transportu":
        Salary_Administration[i] = 6000
    elif JobTitle[i] == "Asystent ds. transportu":
        Salary_Administration[i] = 4000
    elif JobTitle[i] == "Informatyk":
        Salary_Administration[i] = 7000
    elif JobTitle[i] == "Główny księgowy":
        Salary_Administration[i] = 7000
        
# Phone number
PhoneNumber_Administration = PhoneNumber[90:110]

# Country
Country_Administration = ['Polska' for i in range(20)]

# City
City_Administration = random.choices(pula_miast, k = 20)

# Bank account
bank4=[]
for i in range(20):
    bank4.append(''.join(random.sample(string.digits, k=10)))
bank5=[]
for i in range(20):
    bank5.append(''.join(random.sample(string.digits, k=10)))
bank6=[]
for i in range(20):
    bank6.append(''.join(random.sample(string.digits, k=6)))

bank_administration= [i + j + k for i, j, k in zip(bank4, bank5, bank6)] 

for i in range(20): 
    bank_administration[i] = bank_administration[i] 

# DataFrame
Administration = pd.DataFrame(data={'EmployeeID':EmployeeID,'FirstName':FirstName_Administration,'LastName':LastName_Administration,'BirthDate':BirthDate_Administration,'EducationalBackground':EducationalBackground,'JobTitle':JobTitle,'HireDate':HireDate_Administration,'Salary':Salary_Administration,'DefaultVehicle':['NULL']*20, 'DrivingLicence':['NULL']*20,'PhoneNumber':PhoneNumber_Administration,'Country':Country_Administration,'City':City_Administration,'BankAccountNumber':bank_administration})
Administration

Unnamed: 0,EmployeeID,FirstName,LastName,BirthDate,EducationalBackground,JobTitle,HireDate,Salary,DefaultVehicle,DrivingLicence,PhoneNumber,Country,City,BankAccountNumber
0,11772,Zbigniew,Jabłoński,1986-10-02,Wykształcenie średnie,Księgowy,2017-11-14,4000,,,751070214,Polska,Domasław,45289016371304872569284563
1,12232,Tomasz,Ostrowski,1982-12-06,Wykształcenie średnie,Asystent ds. transportu,2018-01-27,4000,,,756791358,Polska,Chrzanów,94021687357649102385852614
2,15605,Roman,Majewski,1965-08-14,Wykształcenie średnie,Archiwista,2017-03-03,3000,,,735347070,Polska,Wrocław,61720534983267495018176385
3,14284,Waldemar,Kołodziej,1991-08-26,Wykształcenie średnie,Pracownik sekretariatu,2013-06-02,3500,,,710582117,Polska,Wrocław,90183527648241360975965328
4,11153,Przemysław,Głowacki,1991-08-25,Wykształcenie wyższe,Asystent ds. transportu,2017-07-18,4000,,,770094019,Polska,Wrocław,60491358272946710835739528
5,11604,Ewelina,Wróbel,1992-03-02,Wykształcenie wyższe,Księgowy,2012-10-14,4000,,,759631926,Polska,Wrocław,65471328095076281349078942
6,10545,Zofia,Krajewska,1991-09-20,Wykształcenie wyższe,Główny księgowy,2019-12-11,7000,,,746369274,Polska,Wrocław,47831509625486910237305296
7,11023,Teresa,Lis,1986-10-27,Wykształcenie średnie,Księgowy,2018-12-12,4000,,,796879375,Polska,Wrocław,80317954625937024618628157
8,17833,Urszula,Szymczak,1973-04-06,Wykształcenie wyższe,Pracownik sekretariatu,2016-12-16,3500,,,767113423,Polska,Wrocław,13589706246348591072193286
9,15027,Beata,Sikorska,1991-01-04,Wykształcenie wyższe,Członek zarządu,2014-09-29,10000,,,748629726,Polska,Żórawina,25183079466738049521395216


In [68]:
# Connecting two dataframes to create one Employees table
###################################################################################################

Employees = pd.concat([Drivers, Administration])
Employees

Unnamed: 0,EmployeeID,FirstName,LastName,BirthDate,EducationalBackground,JobTitle,HireDate,Salary,DefaultVehicle,DrivingLicence,PhoneNumber,Country,City,BankAccountNumber
0,18961,Mariusz,Wiśniewski,1979-08-21,Wykształcenie średnie,Kierowca,2016-08-03,4200,6.0,C1 + E,783496699,Polska,Bielany Wrocławskie,31742805697129864503253890
1,14788,Damian,Makowski,1979-04-01,Wykształcenie średnie,Kierowca,2014-05-27,3000,17.0,C1 + E,742648530,Polska,Siechnice,89534216077259148360692805
2,19152,Kazimierz,Urbański,1992-07-08,Wykształcenie średnie,Kierowca,2019-04-09,2800,5.0,C1 + E,773434687,Polska,Wrocław,50348269711986254730982056
3,14295,Kamil,Kaczmarek,1972-07-25,Wykształcenie średnie,Kierowca,2019-06-02,2800,7.0,C + E,708981797,Polska,Wrocław,86045123975673012498276814
4,15421,Mateusz,Witkowski,1980-06-28,Wykształcenie średnie,Kierowca,2018-07-26,3400,16.0,C1 + E,709361873,Polska,Chrzanów,58416097236045189273073296
5,13042,Rafał,Szczepański,1992-12-02,Wykształcenie średnie,Kierowca,2015-01-07,3800,10.0,C1 + E,773514703,Polska,Wrocław,59768410328037654129861735
6,18467,Marian,Stępień,1989-03-15,Wykształcenie średnie,Kierowca,2014-06-11,4400,3.0,C1 + E,756691338,Polska,Wrocław,58267309148940126735851397
7,16527,Tomasz,Adamski,1972-06-30,Wykształcenie średnie,Kierowca,2019-11-30,4400,15.0,C1 + E,728845769,Polska,Wrocław,21085967436483021795138967
8,17951,Mateusz,Jaworski,1968-07-20,Wykształcenie średnie,Kierowca,2019-10-02,4000,20.0,C1 + E,764592918,Polska,Wrocław,62078951435923078416619437
9,17220,Roman,Michalak,1976-06-14,Wykształcenie średnie,Kierowca,2019-07-07,2400,8.0,C1 + E,713802761,Polska,Chrzanów,38624059714509372186401659


## Generation of Clients table

This table contains information about clients, such as: client ID, first name, last name, phone number and their discount. The regular customers are marked by discount values lower than $1$.

In [69]:
# ID
ClientID = WorkerID[110:186]

# First name and last name
FirstName_Client = Imiona_M[65:93] + Imiona_D[45:93]   
LastName_Client = Nazwiska_M[65:93] + Nazwiska_D[45:93]

# Phone number
PhoneNumber_Client = PhoneNumber[110:186]

# Discount
pula_discount = [1, 1, 1, 1, 1, 0.9, 0.8]
Discount = random.choices(pula_discount, k = 76)

# DataFrame
Clients = pd.DataFrame(data={'ClientID':ClientID,'FirstName':FirstName_Client,'LastName':LastName_Client,'PhoneNumber':PhoneNumber_Client,'Discount':Discount})
Clients

Unnamed: 0,ClientID,FirstName,LastName,PhoneNumber,Discount
0,12700,Jacek,Mazurek,716943389,0.9
1,18260,Czesław,Sikora,738287658,0.8
2,10283,Daniel,Czerwiński,774594919,0.9
3,13482,Mirosław,Wojciechowski,703720745,0.9
4,11494,Waldemar,Kozłowski,752190438,1.0
...,...,...,...,...,...
71,11325,Edyta,Michalska,762472494,1.0
72,19305,Aleksandra,Adamczyk,721784357,1.0
73,12038,Izabela,Marciniak,720024005,1.0
74,10314,Teresa,Głowacka,788997799,1.0


## Generating the Vehicles table

This table contains information about the vehicles owned by the company, such as: vehicle ID, make of the vehicle, model of the vehicle, licence plate number, capacity, last service date and insurance number.

In [70]:
# VehicleID
VehicleID = DefaultVehicle

# Vehicle Make
pula_tirow = ["Mercedes-Benz", "MAN", "Volvo"]
Vehicle_Make = random.choices(pula_tirow, k=20)
pula_mercedes = ["Actros 1851 LS 4x2", "Actros 1845 LS 4x2", "Actros 1848 LSnRL", "Actros 1851 MirrorCam"]
pula_volvo = ["FH 500", "FH 500 4x2 Low Liner", "FH 460 4x2"]
pula_man = ["TGX 26.440 6X2 BLS", "TGX 18.500 LLS-U", "TGX 18.500 4X2 LLS-U", "TGX 18.560 4X2 BLS"]

# Vehicle Model
Vehicle_Model = []
for i in range(20):
    if Vehicle_Make[i] == "Mercedes-Benz":
        Vehicle_Model.append(random.choice(pula_mercedes))
    elif Vehicle_Make[i] == "MAN":
        Vehicle_Model.append(random.choice(pula_man))
    elif Vehicle_Make[i] == "Volvo":
        Vehicle_Model.append(random.choice(pula_volvo))

# Licence Plate
pula_LP1 = ["DWR", "DW"]
LP_1 = random.choices(pula_LP1, k=20)
pula_LP2 = list(np.linspace(1000,9999,600,dtype=int))
LP_2 = random.sample(pula_LP2, k=20)
pula_LP3 = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"]
LP_3 = random.choices(pula_LP3, k=20)

Tablica = [i + str(j) + k for i, j, k in zip(LP_1, LP_2, LP_3)] 

# Capacity
pula_capacity = ["120m3", "86m3", "60m3"]
Capacity = random.choices(pula_capacity, k=20)

# Last Service Date
start_date = datetime.date(2019, 7, 1) 
end_date = datetime.date(2020, 6, 1)
Service_Date = []
i=0
while i < 20:
    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange(days_between_dates)
    random_date = start_date + datetime.timedelta(days=random_number_of_days)
    Service_Date.append(random_date)
    i = i + 1

# Insurance Number
polisa=[]
for i in range(20):
    polisa.append(''.join(random.sample(string.digits, k=10)))
    

# DataFrame
Vehicles = pd.DataFrame(data={'VehicleID':VehicleID,'VehicleMake':Vehicle_Make,'VehicleModel':Vehicle_Model,'LicencePlate':Tablica,'Capacity':Capacity,'LastServiceDate':Service_Date,'InsuranceNumber':polisa})
Vehicles

Unnamed: 0,VehicleID,VehicleMake,VehicleModel,LicencePlate,Capacity,LastServiceDate,InsuranceNumber
0,6,Volvo,FH 460 4x2,DW5627H,86m3,2019-12-04,3942508176
1,17,Mercedes-Benz,Actros 1848 LSnRL,DWR4560P,120m3,2019-12-29,3195648027
2,5,Volvo,FH 460 4x2,DWR8932D,86m3,2020-05-29,3526987401
3,7,Volvo,FH 500,DWR4470K,86m3,2019-12-15,3425678019
4,16,Mercedes-Benz,Actros 1851 LS 4x2,DWR6979D,120m3,2019-07-11,2073948615
5,10,Volvo,FH 500,DW2231G,120m3,2019-08-12,5876029413
6,3,MAN,TGX 18.560 4X2 BLS,DWR1555J,120m3,2020-03-29,4128537690
7,15,Mercedes-Benz,Actros 1848 LSnRL,DW2802M,60m3,2019-10-13,3819065274
8,20,Mercedes-Benz,Actros 1851 MirrorCam,DWR9458N,60m3,2019-08-14,687453912
9,8,MAN,TGX 18.500 LLS-U,DW2141I,60m3,2020-02-12,4508371629


## Generating the Contractors table

In the Contractors table we can find the information about the companies we work with (buying fuel, office supplies, office cleaning, insurance). It contains information about: contractor ID, contractor name, phone number and city.

In [71]:
# ID
ContractorID = random.sample(list(np.linspace(1, 10, 10, dtype = int)), k = 4)

# Contractor Name
ContractorName = ["Kopciuszek-Pol", "ExtraPaliwex", "Papier i Pióro", "PZU"]

# Phone Number
PhoneNumber_Contractors = PhoneNumber[201:205] 

# City
City_Contractors = random.choices(pula_miast, k = 3)
City_Contractors.append("Wrocław")

# DataFrame
Contractors = pd.DataFrame(data={'ContractorID':ContractorID,'ContractorName':ContractorName,'PhoneNumber':PhoneNumber_Contractors,'City':City_Contractors})
Contractors

Unnamed: 0,ContractorID,ContractorName,PhoneNumber,City
0,3,Kopciuszek-Pol,707981597,Żórawina
1,5,ExtraPaliwex,761152230,Wrocław
2,7,Papier i Pióro,727985597,Domasław
3,1,PZU,737087418,Wrocław


## Generating Commissions table

In this table all the payments for commissioned work every month are registered. Each has its own ID, name of the contractor, date of commission, maturity date and price. The price of fuel and insurance depend on the amount of cars the company posesses at the time.

In [72]:
# CommissionID
pula_id_comm = list(np.linspace(1000,2000,1001,dtype=int))
ilosc_miesiecy = 12*8+6
ilosc_transakcji = 4*ilosc_miesiecy #Płacimy co miesiąc za usługi, od otwarcia firmy
CommissionID = random.sample(pula_id_comm, k = ilosc_transakcji)

# ContractorID, Commission Date, Contractor Name
CommissionDate = []
ContractorName = []
ContractorID = []
date1 = datetime.date(2012, 1, 1)
date2 = datetime.date(2012, 1, 2)
for i in range(ilosc_miesiecy):
    CommissionDate.append(date1 + relativedelta(months=i))
    ContractorID.append(Contractors.loc[0]['ContractorID'])
    ContractorName.append(Contractors.loc[0]['ContractorName'])
for i in range(ilosc_miesiecy):
    CommissionDate.append(date2 + relativedelta(months=i))
    ContractorID.append(Contractors.loc[1]['ContractorID'])
    ContractorName.append(Contractors.loc[1]['ContractorName'])
for i in range(ilosc_miesiecy):
    CommissionDate.append(date2 + relativedelta(months=i))
    ContractorID.append(Contractors.loc[2]['ContractorID'])
    ContractorName.append(Contractors.loc[2]['ContractorName'])
for i in range(ilosc_miesiecy):
    CommissionDate.append(date1 + relativedelta(months=i))
    ContractorID.append(Contractors.loc[3]['ContractorID'])
    ContractorName.append(Contractors.loc[3]['ContractorName'])

# Price
num_hired_monthly = []
for i in range(101):
    num_hired_monthly.append(sum(j < CommissionDate[i+1] for j in HireDate_Driver))
num_hired_monthly.append(num_hired_monthly[-1])

CommissionPrice = [3000]*ilosc_miesiecy+ [x*8775 for x in num_hired_monthly] + [250]*ilosc_miesiecy + [x*250 for x in num_hired_monthly]

# Maturity Date
MaturityDate = [x+relativedelta(days=3) for x in CommissionDate]

# DataFrame
Commissions = pd.DataFrame(data={'CommissionID':CommissionID,'ContractorID':ContractorID,'ContractorName':ContractorName,'CommissionDate':CommissionDate,'CommissionPrice':CommissionPrice,'MaturityDate':MaturityDate})
Commissions = Commissions.sort_values(by = 'CommissionDate')
Commissions

Unnamed: 0,CommissionID,ContractorID,ContractorName,CommissionDate,CommissionPrice,MaturityDate
0,1496,3,Kopciuszek-Pol,2012-01-01,3000,2012-01-04
306,1722,1,PZU,2012-01-01,1000,2012-01-04
204,1328,7,Papier i Pióro,2012-01-02,250,2012-01-05
102,1923,5,ExtraPaliwex,2012-01-02,35100,2012-01-05
1,1149,3,Kopciuszek-Pol,2012-02-01,3000,2012-02-04
...,...,...,...,...,...,...
202,1322,5,ExtraPaliwex,2020-05-02,175500,2020-05-05
407,1136,1,PZU,2020-06-01,5000,2020-06-04
101,1803,3,Kopciuszek-Pol,2020-06-01,3000,2020-06-04
305,1284,7,Papier i Pióro,2020-06-02,250,2020-06-05


## Generating Order details table

This table contains detailed information about all orders taken in the years that company was active. Each record has information about: order ID, assigned driver ID, client ID, vehicle ID, weight of the order, pickup address, delivery address, price of delivery, cost of delivery, date of order, date of pickup, expected delivery date and maturity date. 

In [73]:
OrderID = [] 
DriverID_ord = [] 
ClientID_ord = [] 
VehicleID_ord = []
OrderWeight = [] 
OrderDate = [] 
PickupDate = [] 
ExpectedDeliveryDate = [] 
PickupAddress = []
DeliveryAddress = []
Price_ord = [] 
Cost_ord = [] 
MaturityDate_ord = [] 

for i in range(20):
    num_of_days = datetime.date(2020,6,6) - HireDate_Driver[i]
    num_of_days = num_of_days.days
    j = 0
    urlop = 1
    while j < num_of_days:
        if (urlop % 3) != 0 :
            k = random.randint(2,5)
            PickupDate.append(HireDate_Driver[i] + relativedelta(days=j))
            ExpectedDeliveryDate.append(HireDate_Driver[i] + relativedelta(days=j + k))
            DriverID_ord.append(DriverID[i])
            VehicleID_ord.append(DefaultVehicle[i])
            if Capacity[i] == '120m3':
                OrderWeight.append(random.choice(['20t', '22t', '24t']))
                if OrderWeight[-1] == '20t':
                    Price_ord.append(1200*8)
                elif OrderWeight[-1] == '22t':
                    Price_ord.append(1300*8)
                elif OrderWeight[-1] == '24t':
                    Price_ord.append(1400*8)
            elif Capacity[i] ==  "86m3":
                OrderWeight.append(random.choice(['10t', '12t', '14t']))
                if OrderWeight[-1] == '10t':
                    Price_ord.append(700*8)
                elif OrderWeight[-1] == '12t':
                    Price_ord.append(800*8)
                elif OrderWeight[-1] == '14t':
                    Price_ord.append(900*8)
            elif Capacity[i] == "60m3":
                OrderWeight.append(random.choice(['5t', '7t', '8t']))
                if OrderWeight[-1] == '5t':
                    Price_ord.append(400*8)
                elif OrderWeight[-1] == '7t':
                    Price_ord.append(500*8)
                elif OrderWeight[-1] == '8t':
                    Price_ord.append(600*8)
            Cost_ord.append(int(Price_ord[-1]*0.2 + k*200))
        urlop = urlop + 1    
        j = j + k + 1  # additional day between orders
    PickupDate.pop()
    ExpectedDeliveryDate.pop()
    DriverID_ord.pop()
    VehicleID_ord.pop()
    OrderWeight.pop()
    Price_ord.pop()
    Cost_ord.pop()
    
# OrderID
OrderID =  np.linspace(1,len(DriverID_ord),len(DriverID_ord), dtype=int)

# OrderDate - assuming that packages are taken in one to three days
for i in range(len(PickupDate)):
    OrderDate.append(PickupDate[i] - relativedelta(days=random.randint(1,3)))

# MaturityDate
MaturityDate_ord = [x+relativedelta(days=7) for x in OrderDate]

# ClientID
ClientID_ord = random.choices(ClientID, k = len(PickupDate))

# Pickup and delivery address
pula_miast_2 = ['Wrocław','Wałbrzych', 'Bielany Wrocławskie','Jelenia Góra','Lublin','Głogów','Świdnica', 'Bolesławiec','Oleśnica','Oława','Świebodzice','Kłodzko','Jawor','Polkowice','Nowa Ruda', 'Złotoryja','Strzelin','Milicz','Sobótka','Lwówek Śląski','Kobierzyce','Żarów','Twardogóra','Szklarska Poręba','Głuszyca']
pula_ulic = ['Polna','Leśna','Słoneczna','Krótka','Szkolna','Ogrodowa','Lipowa','Łąkowa','Brozowa','Kwiatowa','Kościelna','Sosnowa','Zielona','Parkowa','Akacjowa','Kolejowa','Wietrzna','Krakowska','Katowicka','Zimowa','Wiosenna','Dębowa','Jastrzębia','Piaskowa','Sportowa','Tyniecka','Orla']
for i in range(len(PickupDate)):
    miasta = random.sample(pula_miast_2, k = 2)
    PickupAddress.append(miasta[0]+', ul. '+random.choice(pula_ulic)+' '+str(random.randint(1,70)))
    DeliveryAddress.append(miasta[1]+', ul. '+random.choice(pula_ulic)+' '+str(random.randint(1,70)))

# DataFrame
OrderDetails = pd.DataFrame(data = {'OrderID':OrderID, 'ClientID':ClientID_ord, 'DriverID':DriverID_ord, 'VehicleID':VehicleID_ord, 'OrderWeight':OrderWeight, 'OrderDate':OrderDate, 'PickupDate':PickupDate, 'ExpectedDeliveryDate':ExpectedDeliveryDate, 'PickupAddress':PickupAddress, 'DeliveryAddress':DeliveryAddress, 'Price':Price_ord, 'Cost':Cost_ord, 'MaturityDate':MaturityDate_ord})
OrderDetails = OrderDetails.sort_values(by = 'OrderDate')
OrderDetails


Unnamed: 0,OrderID,ClientID,DriverID,VehicleID,OrderWeight,OrderDate,PickupDate,ExpectedDeliveryDate,PickupAddress,DeliveryAddress,Price,Cost,MaturityDate
4088,4089,13285,13601,4,12t,2011-12-29,2012-01-01,2012-01-04,"Bolesławiec, ul. Tyniecka 10","Lublin, ul. Kościelna 26",6400,1880,2012-01-05
3185,3186,13317,13056,1,7t,2011-12-30,2012-01-01,2012-01-06,"Złotoryja, ul. Kolejowa 9","Kobierzyce, ul. Orla 53",4000,1800,2012-01-06
3634,3635,19305,12829,12,12t,2011-12-31,2012-01-01,2012-01-04,"Kłodzko, ul. Zielona 45","Złotoryja, ul. Kościelna 67",6400,1880,2012-01-07
2740,2741,10745,10623,14,24t,2011-12-31,2012-01-01,2012-01-06,"Bielany Wrocławskie, ul. Zielona 10","Twardogóra, ul. Łąkowa 15",11200,3240,2012-01-07
4089,4090,11816,13601,4,14t,2012-01-02,2012-01-05,2012-01-10,"Głuszyca, ul. Akacjowa 53","Strzelin, ul. Piaskowa 14",7200,2440,2012-01-09
...,...,...,...,...,...,...,...,...,...,...,...,...,...
586,587,16664,19152,5,12t,2020-05-27,2020-05-30,2020-06-01,"Żarów, ul. Zielona 51","Nowa Ruda, ul. Krakowska 3",6400,1680,2020-06-03
3633,3634,18227,13056,1,5t,2020-05-28,2020-05-30,2020-06-03,"Bolesławiec, ul. Wietrzna 23","Szklarska Poręba, ul. Dębowa 37",3200,1440,2020-06-04
1522,1523,15454,15410,2,7t,2020-05-28,2020-05-29,2020-05-31,"Lublin, ul. Piaskowa 6","Polkowice, ul. Dębowa 38",4000,1200,2020-06-04
1477,1478,18985,17220,8,5t,2020-05-30,2020-05-31,2020-06-03,"Jelenia Góra, ul. Orla 21","Lwówek Śląski, ul. Krótka 53",3200,1240,2020-06-06


## Generating Orders table

This table contains shortened information about orders along with corresponding transaction ID.

In [74]:
# TrancactionID
INTransactionID = OrderID

# Dataframe
Orders = pd.DataFrame(data={'OrderID':OrderID,'VehicleID':VehicleID_ord, 'DriverID':DriverID_ord, 'ClientID':ClientID_ord, 'INTransactionID':INTransactionID})
Orders

Unnamed: 0,OrderID,VehicleID,DriverID,ClientID,INTransactionID
0,1,6,18961,10421,1
1,2,6,18961,15461,2
2,3,6,18961,13104,3
3,4,6,18961,19776,4
4,5,6,18961,12747,5
...,...,...,...,...,...
4549,4550,4,13601,13704,4550
4550,4551,4,13601,19167,4551
4551,4552,4,13601,11803,4552
4552,4553,4,13601,11041,4553


## Generating Incoming transactions table

This table containg information about the transactions corresponding to the payments for orders, such as: transaction ID, type of transaction, order ID, transactor ID (client ID), the amount paid and the date of transaction.

In [75]:
# TransactionType
TransactionType = ['Opłata za przewóz']*len(OrderID)

# DataFrame
Incoming_Transactions = pd.DataFrame(data={'INTransactionID':INTransactionID,'TransactionType':TransactionType,'OrderID':OrderID,'Transactor':ClientID_ord,'Amount':Price_ord,'TransactionDate':MaturityDate_ord})
Incoming_Transactions = Incoming_Transactions.sort_values(by='TransactionDate')
Incoming_Transactions

Unnamed: 0,INTransactionID,TransactionType,OrderID,Transactor,Amount,TransactionDate
4088,4089,Opłata za przewóz,4089,13285,6400,2012-01-05
3185,3186,Opłata za przewóz,3186,13317,4000,2012-01-06
3634,3635,Opłata za przewóz,3635,19305,6400,2012-01-07
2740,2741,Opłata za przewóz,2741,10745,11200,2012-01-07
4089,4090,Opłata za przewóz,4090,11816,7200,2012-01-09
...,...,...,...,...,...,...
586,587,Opłata za przewóz,587,16664,6400,2020-06-03
3633,3634,Opłata za przewóz,3634,18227,3200,2020-06-04
1522,1523,Opłata za przewóz,1523,15454,4000,2020-06-04
1477,1478,Opłata za przewóz,1478,18985,3200,2020-06-06


## Generating Outgoing transactions tables

The two tables below contain information about the outgoing transactions to the employees and to the contractors. The columns in these tables are: transaction ID, type of transaction, transactor ID (employee ID or contractor ID), amount paid and the date of transaction.

In [76]:
# Outgoing transactions - commissions
#########################################################################

TransactionType = ["Zlecenie"]*306 + ["Opłata za ubezpieczenie"]*102
Transactor_out = ContractorID
Amount = CommissionPrice
TransactionDate_out = MaturityDate
    
# OUTTransactionID
OUTTransactionID = np.linspace(1,len(Amount),len(Amount), dtype=int)

# DataFrame
Outgoing_Transactions_commissions = pd.DataFrame(data={'OUTTransactionID':OUTTransactionID, 'TransactionType':TransactionType,'TransactorID':Transactor_out,'Amount':Amount,'TransactionDate':TransactionDate_out})
Outgoing_Transactions_commissions = Outgoing_Transactions_commissions.sort_values(by = 'TransactionDate')
Outgoing_Transactions_commissions

Unnamed: 0,OUTTransactionID,TransactionType,TransactorID,Amount,TransactionDate
0,1,Zlecenie,3,3000,2012-01-04
306,307,Opłata za ubezpieczenie,1,1000,2012-01-04
204,205,Zlecenie,7,250,2012-01-05
102,103,Zlecenie,5,35100,2012-01-05
1,2,Zlecenie,3,3000,2012-02-04
...,...,...,...,...,...
202,203,Zlecenie,5,175500,2020-05-05
407,408,Opłata za ubezpieczenie,1,5000,2020-06-04
101,102,Zlecenie,3,3000,2020-06-04
305,306,Zlecenie,7,250,2020-06-05


In [77]:
# Outgoing transactions - employees
####################################################################

TransactionType = []
Transactor_out = []
Amount = []
TransactionDate_out = []

# Salary
for i in range(20):
    
    # Drivers
    date = HireDate_Driver[i]
    TransactionDate_out.append(date)
    TransactionType.append("Wypłata pensji")
    Transactor_out.append(DriverID[i])
    Amount.append(Salary_Driver[i])
    
    while date < datetime.date(2020,6,6):
        TransactionDate_out.append(date + relativedelta(months=1))
        date = date + relativedelta(months=1)
        TransactionType.append("Wypłata pensji")
        Transactor_out.append(DriverID[i])
        Amount.append(Salary_Driver[i])
    TransactionDate_out.pop()
    TransactionType.pop()
    Amount.pop()
    Transactor_out.pop()
    
    # Office workers
    date = HireDate_Administration[i]
    TransactionDate_out.append(date)
    TransactionType.append("Wypłata pensji")
    Transactor_out.append(EmployeeID[i])
    Amount.append(Salary_Administration[i])
    
    while date < datetime.date(2020,6,6):
        TransactionDate_out.append(date + relativedelta(months=1))
        date = date + relativedelta(months=1)
        TransactionType.append("Wypłata pensji")
        Transactor_out.append(EmployeeID[i])
        Amount.append(Salary_Administration[i])
    TransactionDate_out.pop()
    TransactionType.pop()
    Amount.pop()
    Transactor_out.pop()
    

# OUTTransactionID
OUTTransactionID = np.linspace(1,len(Amount),len(Amount), dtype=int)

# DataFrame
Outgoing_Transactions_employees = pd.DataFrame(data={'OUTTransactionID':OUTTransactionID, 'TransactionType':TransactionType,'TransactorID':Transactor_out,'Amount':Amount,'TransactionDate':TransactionDate_out})
Outgoing_Transactions_employees = Outgoing_Transactions_employees.sort_values(by = 'TransactionDate')
Outgoing_Transactions_employees

Unnamed: 0,OUTTransactionID,TransactionType,TransactorID,Amount,TransactionDate
1378,1379,Wypłata pensji,17767,4000,2012-01-01
1684,1685,Wypłata pensji,13056,3400,2012-01-01
2092,2093,Wypłata pensji,13601,2400,2012-01-01
1582,1583,Wypłata pensji,11790,3500,2012-01-01
1990,1991,Wypłata pensji,12927,6000,2012-01-01
...,...,...,...,...,...
1077,1078,Wypłata pensji,19611,4000,2020-06-02
658,659,Wypłata pensji,17951,4000,2020-06-02
46,47,Wypłata pensji,18961,4200,2020-06-03
233,234,Wypłata pensji,15605,3000,2020-06-03


## Inserting data into database

In [78]:
import mysql.connector

In [79]:
conn = mysql.connector.connect(host = "localhost",
                                    user = "root",
                                    password = "Michal1997",
                                   database = "company")

In [80]:
cursor = conn.cursor(buffered=True,dictionary=True)

In [81]:
for i,row in Clients.iterrows():
    sql = "INSERT INTO Clients VALUES (" + str(row[0])+',' + "'"+str(row[1])+"'"+ ','+"'"+str(row[2])+"'"+','+str(row[3])+ ','+str(row[4]) + ");"
    cursor.execute(sql)
    conn.commit()

In [82]:
for i,row in Vehicles.iterrows():
    sql = "INSERT INTO Vehicles VALUES (" + str(row[0])+',' + "'"+str(row[1])+"'"+ ','+"'"+str(row[2])+"'"+','+ "'"+str(row[3])+"'"+ ','+"'"+str(row[4])+"'"+ ','+"'"+str(row[5])+"'"+ ','+ "'"+str(row[6])+"'"+ ");"
    cursor.execute(sql)
    conn.commit()

In [84]:
for i,row in Employees.iterrows():
    if row[9] == 'NULL':
        sql = "INSERT INTO Employees VALUES (" + str(row[0])+',' + "'"+str(row[1])+"'"+ ','+"'"+str(row[2])+"'"+','+ "'"+str(row[3])+"'"+ ',' + "'" + str(row[4]) + "'" + ',' + "'" + str(row[5]) + "'" + ',' +"'"+str(row[6])+"'"+ ','+ str(row[7])+ ','+str(row[8])+ ','+ str(row[9])+ ','+str(row[10])+ ','+"'"+str(row[11])+"'"+ ','+"'"+str(row[12])+"'"+ ','+"'"+str(row[13])+"'" + ");"
    else:
        sql = "INSERT INTO Employees VALUES (" + str(row[0])+',' + "'"+str(row[1])+"'"+ ','+"'"+str(row[2])+"'"+','+ "'"+str(row[3])+"'"+ ',' + "'" + str(row[4]) + "'" + ',' + "'" + str(row[5]) + "'" + ',' +"'"+str(row[6])+"'"+ ','+ str(row[7])+ ','+str(row[8])+ ','+"'"+str(row[9])+"'"+ ','+str(row[10])+ ','+"'"+str(row[11])+"'"+ ','+"'"+str(row[12])+"'"+ ','+"'"+str(row[13])+"'" + ");"
    cursor.execute(sql)
    conn.commit()

In [85]:
for i,row in Contractors.iterrows():
    sql = "INSERT INTO Contractors VALUES (" + str(row[0])+',' + "'"+str(row[1])+"'"+ ','+str(row[2])+','+"'"+str(row[3])+ "'" + ");"
    cursor.execute(sql)
    conn.commit()

In [86]:
for i,row in Commissions.iterrows():
    sql = "INSERT INTO Commissions VALUES (" + str(row[0])+',' +str(row[1])+ ','+"'"+str(row[2])+"'"+','+ "'"+str(row[3])+"'"+ ','+str(row[4])+ ','+"'"+str(row[5])+"'"+ ");"
    cursor.execute(sql)
    conn.commit()

In [87]:
for i,row in OrderDetails.iterrows():
    sql = "INSERT INTO Order_Details VALUES (" + str(row[0])+',' +str(row[1])+ ','+str(row[2])+','+ str(row[3])+ ','+"'"+str(row[4])+"'"+ ','+"'"+str(row[5])+ "'"+','+"'"+str(row[6])+"'"+ ','+"'"+str(row[7])+"'"+ ','+"'"+str(row[8])+"'"+ ','+"'"+str(row[9])+"'"+ ','+str(row[10])+ ','+str(row[11])+ ','+"'"+str(row[12])+"'" + ");"
    cursor.execute(sql)
    conn.commit()

In [88]:
for i,row in Incoming_Transactions.iterrows():
    sql = "INSERT INTO Incoming_Transactions VALUES (" + str(row[0])+',' +"'"+str(row[1])+"'"+ ','+str(row[2])+','+ "'"+str(row[3])+"'"+ ','+str(row[4])+ ','+"'"+str(row[5])+"'"+ ");"
    cursor.execute(sql)
    conn.commit()

In [89]:
for i,row in Outgoing_Transactions_commissions.iterrows():
    sql = "INSERT INTO Out_Transactions_Commissions VALUES (" + str(row[0])+',' +"'"+str(row[1])+"'"+ ','+ "'"+str(row[2])+"'"+ ','+str(row[3])+ ','+"'"+str(row[4])+"'"+ ");"
    cursor.execute(sql)
    conn.commit()

In [90]:
for i,row in Outgoing_Transactions_employees.iterrows():
    sql = "INSERT INTO Out_Transactions_Employees VALUES (" + str(row[0])+',' +"'"+str(row[1])+"'"+ ','+ "'"+str(row[2])+"'"+ ','+str(row[3])+ ','+"'"+str(row[4])+"'"+ ");"
    cursor.execute(sql)
    conn.commit()

In [91]:
for i,row in Orders.iterrows():
    sql = "INSERT INTO Orders VALUES (" + str(row[0])+',' +str(row[1])+ ','+str(row[2])+','+ str(row[3])+ ','+str(row[4])+ ");"
    cursor.execute(sql)
    conn.commit()