-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_modelling_basics.py
152 lines (130 loc) · 5.55 KB
/
data_modelling_basics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# -*- coding: utf-8 -*-
"""Data Modelling basics.ipynb
Automatically generated by Colaboratory.
"""
# importing the required libraries
import sqlite3
import pandas as pd
# connecting to the database - if it does not exist, will make a new one
conn = sqlite3.connect ('chinook.sqlite')
cur = conn.cursor()
# creating all 11 tables
cur.execute('DROP TABLE IF EXISTS Artist')
cur.execute('''CREATE TABLE Artist (ArtistId INT PRIMARY KEY,
Name TEXT)''')
cur.execute('DROP TABLE IF EXISTS Album')
cur.execute('''CREATE TABLE Album (AlbumId INT PRIMARY KEY,
Title TEXT,
ArtistId INT,
FOREIGN KEY(ArtistId) REFERENCES Artist(ArtistId) ON DELETE SET NULL)''')
cur.execute('DROP TABLE IF EXISTS Playlist')
cur.execute('''CREATE TABLE Playlist (PlaylistId INT PRIMARY KEY,
Name TEXT)''')
cur.execute('DROP TABLE IF EXISTS Genre')
cur.execute('''CREATE TABLE Genre (GenreId INT PRIMARY KEY,
Name TEXT)''')
cur.execute('DROP TABLE IF EXISTS MediaType')
cur.execute('''CREATE TABLE MediaType (MediaTypeId INT PRIMARY KEY,
Name TEXT)''')
cur.execute('DROP TABLE IF EXISTS Track')
cur.execute('''CREATE TABLE Track (TrackId INT PRIMARY KEY,
Name TEXT,
AlbumId INT,
MediaTypeId INT,
GenreId INT,
Composer TEXT,
Milliseconds INT,
Bytes INT,
UnitPrice REAL,
FOREIGN KEY (AlbumId) REFERENCES Album(AlbumId) ON DELETE SET NULL,
FOREIGN KEY (MediaTypeId) REFERENCES MediaType(MediaTypeId) ON DELETE SET NULL,
FOREIGN KEY (GenreId) REFERENCES Genre(GenreId) ON DELETE SET NULL)''')
cur.execute('DROP TABLE IF EXISTS PlaylistTrack')
cur.execute('''CREATE TABLE PlaylistTrack (PlaylistId INT,
TrackId INT,
PRIMARY KEY(PlaylistId, TrackId),
FOREIGN KEY(PlaylistId) REFERENCES Playlist(PlaylistId) ON DELETE CASCADE,
FOREIGN KEY(TrackId) REFERENCES Track(TrackId) ON DELETE CASCADE)''')
cur.execute('DROP TABLE IF EXISTS Employee')
cur.execute('''CREATE TABLE Employee (EmployeeId INT PRIMARY KEY,
LastName TEXT,
FirstName TEXT,
Title TEXT,
ReportsTo INT,
BirthDate DATETIME,
HireDate DATETIME,
Address TEXT,
City TEXT,
State TEXT,
Country TEXT,
PostalCode TEXT,
Phone TEXT,
Fax TEXT,
Email TEXT,
FOREIGN KEY(ReportsTo) REFERENCES Employee(EmployeeId) ON DELETE SET NULL)''')
# In MySQL referencing to own table won't work cause the table it is referencing to is not being created
# so have to alter the table later on to add the foreign key.
# When populating this table, make sure the foreign key value already have its corresponding primary key value
# as they both are in same table
cur.execute('DROP TABLE IF EXISTS Customer')
cur.execute('''CREATE TABLE Customer (CustomerId INT PRIMARY KEY,
FirstName TEXT,
LastName TEXT,
Company TEXT,
Address TEXT,
City TEXT,
State TEXT,
Country TEXT,
PostalCode TEXT,
Phone TEXT,
Fax TEXT,
Email TEXT,
SupportRepId INT,
FOREIGN KEY (SupportRepId) REFERENCES Employee(EmployeeId) ON DELETE SET NULL)''')
cur.execute('DROP TABLE IF EXISTS Invoice')
cur.execute('''CREATE TABLE Invoice (InvoiceId INT PRIMARY KEY,
CustomerId INT,
InvoiceDate DATETIME,
BillingAddress TEXT,
BillingCity TEXT,
BillingState TEXT,
BillingCountry TEXT,
BillingPostalCode TEXT,
Total REAL,
FOREIGN KEY (CustomerId) REFERENCES Customer(CustomerId) ON DELETE SET NULL)''')
cur.execute('DROP TABLE IF EXISTS InvoiceLine')
cur.execute('''CREATE TABLE InvoiceLine (InvoiceLineId INT PRIMARY KEY,
InvoiceId INT,
TrackId INT,
UnitPrice REAL,
Quantity INT,
FOREIGN KEY (InvoiceId) REFERENCES Invoice(InvoiceId) ON DELETE SET NULL,
FOREIGN KEY (TrackId) REFERENCES Track(TrackId) ON DELETE SET NULL)''')
# reading all the csv files and storing them in dataframes
artist_df = pd.read_csv('https://raw.githubusercontent.com/w3c/csvw/gh-pages/examples/tests/scenarios/chinook/csv/Artist.csv')
album_df = pd.read_csv('https://raw.githubusercontent.com/w3c/csvw/gh-pages/examples/tests/scenarios/chinook/csv/Album.csv')
playlist_df = pd.read_csv('https://raw.githubusercontent.com/w3c/csvw/gh-pages/examples/tests/scenarios/chinook/csv/Playlist.csv')
playlisttrack_df = pd.read_csv('https://raw.githubusercontent.com/w3c/csvw/gh-pages/examples/tests/scenarios/chinook/csv/PlaylistTrack.csv')
mediatype_df = pd.read_csv('https://raw.githubusercontent.com/w3c/csvw/gh-pages/examples/tests/scenarios/chinook/csv/MediaType.csv')
genre_df = pd.read_csv('https://raw.githubusercontent.com/w3c/csvw/gh-pages/examples/tests/scenarios/chinook/csv/Genre.csv')
employee_df = pd.read_csv('https://raw.githubusercontent.com/w3c/csvw/gh-pages/examples/tests/scenarios/chinook/csv/Employee.csv')
customer_df = pd.read_csv('https://raw.githubusercontent.com/w3c/csvw/gh-pages/examples/tests/scenarios/chinook/csv/Customer.csv')
invoice_df = pd.read_csv('https://raw.githubusercontent.com/w3c/csvw/gh-pages/examples/tests/scenarios/chinook/csv/Invoice.csv')
invoiceline_df = pd.read_csv('https://raw.githubusercontent.com/w3c/csvw/gh-pages/examples/tests/scenarios/chinook/csv/InvoiceLine.csv')
track_df = pd.read_csv('https://raw.githubusercontent.com/w3c/csvw/gh-pages/examples/tests/scenarios/chinook/csv/Track.csv')
# insering the data into the tables
dfs_list = [artist_df, album_df, playlist_df, genre_df, mediatype_df,
track_df, playlisttrack_df, employee_df, customer_df, invoice_df, invoiceline_df]
# getting the list of tables in our database
cur.execute('''SELECT name FROM sqlite_master
WHERE type = 'table' ''')
table_list = []
for i in cur.fetchall():
table_list.append(i[0])
# inserting the corresponsding dataframes into their tables row by row
for df, table in zip(dfs_list, table_list):
col_no = len(df.columns)
for index, rows in df.iterrows():
cur.execute(f'''INSERT INTO {table}
VALUES({', '.join(['?']*col_no)})''', list(rows))
conn.commit()