In [1]:
import pandas as pd
import datetime as dt
from sqlalchemy import create_engine
from sqlalchemy.orm import Session

In [2]:
#Import Sales file
sales_file = "Sales_2016.csv"
sales_df = pd.read_csv(sales_file)
sales_df.head()

Unnamed: 0,DATE,TRANSACTION_TYPE,TOTAL_TRANSACTIONS,TOTAL_SALES,TOTAL_UNITS,TOTAL_COUPONS,TOTAL_RETURN_SALES,TOTAL_RETURN_UNITS
0,1/1/2016,S,1604,101482.59,8046,769,0.0,0
1,1/1/2016,R,107,0.0,0,0,7631.56,169
2,1/2/2016,S,2669,174602.55,13305,1446,0.0,0
3,1/2/2016,R,214,0.0,0,0,13213.79,359
4,1/3/2016,S,2298,152357.03,11795,1324,0.0,0


In [3]:
#Split Sales and Returns data from Sales file
sale_only_df=sales_df[sales_df["TRANSACTION_TYPE"] == "S"]
return_only_df=sales_df[sales_df["TRANSACTION_TYPE"] == "R"]

In [4]:
#Import Weather csv file
weather_file = "Cleaned_Weather_Data.csv"
weather_df = pd.read_csv(weather_file)
weather_df.head()

Unnamed: 0,date,maximum temperature,minimum temperature,average temperature,precipitation,snow fall,snow depth
0,1/1/2016 0:00,42,34,38.0,0.0,0.0,0.0
1,1/2/2016 0:00,40,32,36.0,0.0,0.0,0.0
2,1/3/2016 0:00,45,35,40.0,0.0,0.0,0.0
3,1/4/2016 0:00,36,14,25.0,0.0,0.0,0.0
4,1/5/2016 0:00,29,11,20.0,0.0,0.0,0.0


In [5]:
#Fix the date column and create new_date
weather_df['new_date'] = weather_df['date'].astype('datetime64[ns]')
sale_only_df['new_date'] = sale_only_df['DATE'].astype('datetime64[ns]')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [6]:
sale_weather_df = pd.merge(sale_only_df, weather_df, on="new_date")
sale_weather_df.head()

Unnamed: 0,DATE,TRANSACTION_TYPE,TOTAL_TRANSACTIONS,TOTAL_SALES,TOTAL_UNITS,TOTAL_COUPONS,TOTAL_RETURN_SALES,TOTAL_RETURN_UNITS,new_date,date,maximum temperature,minimum temperature,average temperature,precipitation,snow fall,snow depth
0,1/1/2016,S,1604,101482.59,8046,769,0.0,0,2016-01-01,1/1/2016 0:00,42,34,38.0,0.0,0.0,0.0
1,1/2/2016,S,2669,174602.55,13305,1446,0.0,0,2016-01-02,1/2/2016 0:00,40,32,36.0,0.0,0.0,0.0
2,1/3/2016,S,2298,152357.03,11795,1324,0.0,0,2016-01-03,1/3/2016 0:00,45,35,40.0,0.0,0.0,0.0
3,1/4/2016,S,1967,109492.91,9075,947,0.0,0,2016-01-04,1/4/2016 0:00,36,14,25.0,0.0,0.0,0.0
4,1/5/2016,S,1796,108041.47,8424,662,0.0,0,2016-01-05,1/5/2016 0:00,29,11,20.0,0.0,0.0,0.0


In [7]:
sales_weather_cols = ["new_date", "TRANSACTION_TYPE", "TOTAL_TRANSACTIONS", "TOTAL_SALES", "TOTAL_UNITS", "TOTAL_COUPONS", "TOTAL_RETURN_SALES", "TOTAL_RETURN_UNITS", "maximum temperature", "minimum temperature", "average temperature", "precipitation", "snow fall", "snow depth"]
sale_weather_new_df= sale_weather_df[sales_weather_cols].copy()

In [8]:
sale_weather_new_df.head()

Unnamed: 0,new_date,TRANSACTION_TYPE,TOTAL_TRANSACTIONS,TOTAL_SALES,TOTAL_UNITS,TOTAL_COUPONS,TOTAL_RETURN_SALES,TOTAL_RETURN_UNITS,maximum temperature,minimum temperature,average temperature,precipitation,snow fall,snow depth
0,2016-01-01,S,1604,101482.59,8046,769,0.0,0,42,34,38.0,0.0,0.0,0.0
1,2016-01-02,S,2669,174602.55,13305,1446,0.0,0,40,32,36.0,0.0,0.0,0.0
2,2016-01-03,S,2298,152357.03,11795,1324,0.0,0,45,35,40.0,0.0,0.0,0.0
3,2016-01-04,S,1967,109492.91,9075,947,0.0,0,36,14,25.0,0.0,0.0,0.0
4,2016-01-05,S,1796,108041.47,8424,662,0.0,0,29,11,20.0,0.0,0.0,0.0


In [9]:
# rename column headers
sale_weather_new_df2 = sale_weather_new_df.rename(columns={"new_date": "date",
                                                  "maximum temperature": "max_temp",
                                                  "minimum temperature": "min_temp",
                                                  "average temperature": "avg_temp",
                                                  "snow fall": "snow_fall",
                                                  "snow depth": "snow_depth"})
# Set index
sale_weather_new_df2.set_index("date", inplace=True)

In [10]:
#Create database connection
connection_string = "root:Bebe6969$@localhost/weather_data"
engine = create_engine(f'mysql://{connection_string}')

In [11]:
# confirm tables
engine.table_names()

['sales_2016', 'weather_2016', 'weather_sales_2016']

In [12]:
sale_weather_new_df2.dtypes

TRANSACTION_TYPE       object
TOTAL_TRANSACTIONS      int64
TOTAL_SALES           float64
TOTAL_UNITS             int64
TOTAL_COUPONS           int64
TOTAL_RETURN_SALES    float64
TOTAL_RETURN_UNITS      int64
max_temp                int64
min_temp                int64
avg_temp              float64
precipitation         float64
snow_fall             float64
snow_depth            float64
dtype: object

In [13]:
# load final sales df into sql database
sale_weather_new_df2.to_sql(name='weather_sales_2016', con=engine, if_exists='append', index=True)