 # Notebook description

 The role of this notebook is to adjust the data structure from the source files to a format compatible with `Postgres`, and then upload them to the server.

## Notepad configuration

In [1]:
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
import pandas as pd 
import os
from dotenv import  load_dotenv

In [2]:
load_dotenv("D:\Python\devcontainer\.env") #loading postgre password from .env file

True

Create url and engine

In [3]:
url = URL.create(
    "postgresql+psycopg2",
    username='postgres',
    password=os.getenv("POSTGRES_PASSWORD"),  
    host='localhost',
    database='postgres',
)
engine = create_engine(url)

Loading frames into the workspace

In [26]:
def load_raw_data(file_name):
    file_name.columns = [column.lower() for column in file_name.columns]
    return file_name 

Uploading individual files to frames

In [27]:
df_aircraft = pd.read_csv('aircraft.csv')

df_airport = pd.read_csv('airport_list.csv')

df_weather = pd.read_csv('airport_weather.csv')

df_flight = pd.read_csv('flight.csv')

In [28]:
load_raw_data(df_aircraft)

Unnamed: 0,manufacture_year,tail_num,number_of_seats
0,1944,N54514,0.0
1,1945,N1651M,0.0
2,1953,N100CE,0.0
3,1953,N141FL,0.0
4,1953,N151FL,0.0
...,...,...,...
7378,2019,N14011,337.0
7379,2019,N16008,337.0
7380,2019,N16009,337.0
7381,2019,N2250U,276.0


In [29]:
df_aircraft.sample()

Unnamed: 0,manufacture_year,tail_num,number_of_seats
382,1990,N967DL,149.0


In [30]:
load_raw_data(df_airport)

Unnamed: 0,origin_airport_id,display_airport_name,origin_city_name,name
0,11638,Fresno Air Terminal,"Fresno, CA","FRESNO YOSEMITE INTERNATIONAL, CA US"
1,13342,General Mitchell Field,"Milwaukee, WI","MILWAUKEE MITCHELL AIRPORT, WI US"
2,13244,Memphis International,"Memphis, TN","MEMPHIS INTERNATIONAL AIRPORT, TN US"
3,15096,Syracuse Hancock International,"Syracuse, NY","SYRACUSE HANCOCK INTERNATIONAL AIRPORT, NY US"
4,10397,Atlanta Municipal,"Atlanta, GA",ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPO...
...,...,...,...,...
92,13198,Kansas City International,"Kansas City, MO","KANSAS CITY INTERNATIONAL AIRPORT, MO US"
93,10423,Austin - Bergstrom International,"Austin, TX","AUSTIN BERGSTROM INTERNATIONAL AIRPORT, TX US"
94,15370,Tulsa International,"Tulsa, OK","OKLAHOMA CITY WILL ROGERS WORLD AIRPORT, OK US"
95,13303,Miami International,"Miami, FL","MIAMI INTERNATIONAL AIRPORT, FL US"


In [31]:
df_airport.sample()

Unnamed: 0,origin_airport_id,display_airport_name,origin_city_name,name
87,14262,Palm Springs International,"Palm Springs, CA","DESERT RESORTS REGIONAL AIRPORT, CA US"


In [32]:
load_raw_data(df_weather)

Unnamed: 0,wt18,station,name,date,awnd,prcp,snow,snwd,tavg,tmax,...,pgtm,wt10,wesd,sn32,sx32,psun,tsun,tobs,wt07,wt11
0,,USW00013874,ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPO...,2019-01-01,4.70,0.14,0.0,0.0,64.0,66.0,...,,,,,,,,,,
1,,USW00013874,ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPO...,2019-01-02,4.92,0.57,0.0,0.0,56.0,59.0,...,,,,,,,,,,
2,,USW00013874,ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPO...,2019-01-03,5.37,0.15,0.0,0.0,52.0,55.0,...,,,,,,,,,,
3,,USW00013874,ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPO...,2019-01-04,12.08,1.44,0.0,0.0,56.0,66.0,...,,,,,,,,,,
4,,USW00013874,ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPO...,2019-01-05,13.42,0.00,0.0,0.0,49.0,59.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46221,,USW00014762,"PITTSBURGH ALLEGHENY CO AIRPORT, PA US",2020-03-27,3.58,0.21,,,,59.0,...,146.0,,,,,,,,,
46222,,USW00014762,"PITTSBURGH ALLEGHENY CO AIRPORT, PA US",2020-03-28,6.93,1.29,,,,77.0,...,1535.0,,,,,,,,,
46223,,USW00014762,"PITTSBURGH ALLEGHENY CO AIRPORT, PA US",2020-03-29,16.55,0.02,,,,78.0,...,1408.0,,,,,,,,,
46224,,USW00014762,"PITTSBURGH ALLEGHENY CO AIRPORT, PA US",2020-03-30,13.42,0.00,,,,57.0,...,817.0,,,,,,,,,


In [33]:
df_weather.sample()

Unnamed: 0,wt18,station,name,date,awnd,prcp,snow,snwd,tavg,tmax,...,pgtm,wt10,wesd,sn32,sx32,psun,tsun,tobs,wt07,wt11
30079,,USW00022521,"HONOLULU INTERNATIONAL AIRPORT, HI US",2019-10-06,6.49,0.0,,,79.0,86.0,...,,,,,,,,,,


In [34]:
load_raw_data(df_flight)

Unnamed: 0,month,day_of_month,day_of_week,op_unique_carrier,tail_num,op_carrier_fl_num,origin_airport_id,dest_airport_id,crs_dep_time,dep_time,...,crs_elapsed_time,actual_elapsed_time,distance,distance_group,year,carrier_delay,weather_delay,nas_delay,security_delay,late_aircraft_delay
0,1,20,7,WN,N204WN,682,10397,11292,605,602.0,...,205,204.0,1199,5,2019,,,,,
1,1,20,7,WN,N8682B,2622,10397,11292,2120,2114.0,...,210,205.0,1199,5,2019,,,,,
2,1,20,7,WN,N717SA,2939,10397,11292,1800,1807.0,...,210,220.0,1199,5,2019,4.0,0.0,10.0,0.0,3.0
3,1,20,7,WN,N709SW,3848,10397,11292,1355,1354.0,...,205,204.0,1199,5,2019,,,,,
4,1,20,7,WN,N7864B,1352,10397,11697,1125,1125.0,...,120,124.0,581,3,2019,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1386115,3,26,4,DL,N350DN,1982,13303,12953,1956,1944.0,...,183,169.0,1096,5,2020,,,,,
1386116,3,26,4,DL,N908DE,1987,13303,10397,1120,1117.0,...,121,109.0,594,3,2020,,,,,
1386117,3,26,4,DL,,1998,13303,10397,1817,,...,125,,594,3,2020,,,,,
1386118,3,26,4,DL,N352NW,2025,13303,10397,1937,1928.0,...,123,107.0,594,3,2020,,,,,


In [35]:
df_flight.sample()

Unnamed: 0,month,day_of_month,day_of_week,op_unique_carrier,tail_num,op_carrier_fl_num,origin_airport_id,dest_airport_id,crs_dep_time,dep_time,...,crs_elapsed_time,actual_elapsed_time,distance,distance_group,year,carrier_delay,weather_delay,nas_delay,security_delay,late_aircraft_delay
1189474,2,17,7,B6,N969JT,1415,12478,14771,2000,1958.0,...,412,406.0,2586,11,2019,,,,,


### Exporting data to a database

In [14]:
def export_table_to_df(table_name, df):
    df.to_sql(name=table_name, con=engine, if_exists='append', index=False)

 ## Uploading data

 ### Uploading `aircraft_df` to the `aircraft` table.

In [43]:
df_aircraft.to_sql(name='aircraft', con=engine, if_exists='append', index=False)

383

 ### Uploading `airport_weather_df` to the `airport_weather` table.

In [42]:
df_weather.to_sql(name='airport_weather', con=engine, if_exists='append', index=False)

686

 ### Uploading `flight_df` to the `flight` table.

In [44]:
df_flight.to_sql(name='flight', con=engine, if_exists='append', index=False)

120

 ### Uploading `airport_list_df` to the `airport_list` table.

In [41]:
df_airport.to_sql(name='airport_list', con=engine, if_exists='append', index=False)

97