In [1]:
### Setup 

import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Float, Date
from sqlalchemy.orm import Session 

In [2]:
#### Store CSV into a DF

csv_file = (r"C:\Users\rnorris\Documents\grocery.data\Groceries_dataset.csv")
grocery_data_df = pd.read_csv(csv_file)
grocery_data_df.head()

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk


In [3]:
### Bring in another CSV file and turn into a DF

day_sell_csv = (r"C:\Users\rnorris\Documents\GitHub\etl-project\Resources\day_sell.csv")
day_sell_df = pd.read_csv(day_sell_csv)
day_sell_df.head()

Unnamed: 0,Date,zn,sb,tax,marza
0,24.12.2017,1334.95,1903.82,284.7,284.8
1,27.12.2017,2154.4,2953.43,420.77,378.62
2,28.12.2017,2120.66,2914.2,397.22,396.14
3,29.12.2017,2280.9,3152.23,409.6,462.54
4,30.12.2017,3463.87,4823.36,629.44,730.5


In [4]:
## Change date format

day_sell_df['Date'] = pd.to_datetime(day_sell_df.Date)

day_sell_df.head()

Unnamed: 0,Date,zn,sb,tax,marza
0,2017-12-24,1334.95,1903.82,284.7,284.8
1,2017-12-27,2154.4,2953.43,420.77,378.62
2,2017-12-28,2120.66,2914.2,397.22,396.14
3,2017-12-29,2280.9,3152.23,409.6,462.54
4,2017-12-30,3463.87,4823.36,629.44,730.5


In [5]:
### Rename column titles

# day_sell_df = day_sell_df.rename(columns = {'Date': 'Date of Sale','zn': 'Net Purchase per day (PLN)', 'sb': 'Gross Sale per day (PLN)', 'tax': 'Tax of Sale per day (PLN)', 'marza': 'Margin per day (PLN)'})
day_sell_df.index.name = 'Index'
day_sell_df.head()
# Day_Sell = Day_Sell.rename(columns = {'Date': 'Date of Sale','zn': 'Net Purchase per day (PLN)', 'sb': 'Gross Sale per day (PLN)', 'tax': 'Tax of Sale per day (PLN)', 'marza': 'Margin per day (PLN)'})

Unnamed: 0_level_0,Date,zn,sb,tax,marza
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2017-12-24,1334.95,1903.82,284.7,284.8
1,2017-12-27,2154.4,2953.43,420.77,378.62
2,2017-12-28,2120.66,2914.2,397.22,396.14
3,2017-12-29,2280.9,3152.23,409.6,462.54
4,2017-12-30,3463.87,4823.36,629.44,730.5


In [6]:
### Look up Day with highest Gross Sales 

# day_sell_df['Gross Sale per day (PLN)'].head(10)

In [7]:
### bring into sqlite

Base = declarative_base()

class Day_sell(Base):
    __tablename__ = "Day_Sell"
    Index = Column(Integer, primary_key = True)
    Date = Column(String)
    zn = Column(Integer)
    sb = Column (Integer)
    tax = Column(Integer)
    marza = Column(Integer)   
    
    def __init__(self, name):
        
        self.name = name
        
engine = create_engine("sqlite:///day_sell.sqlite")
conn = engine.connect()
Base.metadata.create_all(engine)

day_sell_df.to_sql("Day_Sell", conn, if_exists = 'replace')
    
    
    

In [8]:
## Check
engine.table_names()

['Day Sell', 'Day_Sell']

In [12]:
### Changed column headers, don't need to run again 

# query1 = "ALTER TABLE Day_Sell RENAME COLUMN Date TO 'Date of Sale'"
# query2 = "ALTER TABLE Day_Sell RENAME COLUMN zn TO 'Net Purchase per day (PLN)'"
# query3 = "ALTER TABLE Day_Sell RENAME COLUMN sb TO 'Gross Sale per day (PLN)'"
# query4 = "ALTER TABLE Day_Sell RENAME COLUMN tax TO 'Tax of Sale per day (PLN)'"
# query5 = "ALTER TABLE Day_Sell RENAME COLUMN marza TO 'Margin per day (PLN)'"


# conn.execute(query2)
# conn.execute(query3)
# conn.execute(query4)
# conn.execute(query5)

<sqlalchemy.engine.result.ResultProxy at 0x28103cb62e8>

In [13]:
from sqlalchemy import inspect
inspector = inspect(engine)
for table in inspector.get_table_names():
    columns = inspector.get_columns(table)
    print(table)
    for column in columns:
        print(column["name"], column["type"])
    print('------')

Day Sell
Index BIGINT
Date of Sale TEXT
Net Purchase per day (PLN) FLOAT
Gross Sale per day (PLN) FLOAT
Tax of Sale per day (PLN) FLOAT
Margin per day (PLN) TEXT
------
Day_Sell
Index BIGINT
Date DATETIME
Net Purchase per day (PLN) FLOAT
Gross Sale per day (PLN) FLOAT
Tax of Sale per day (PLN) FLOAT
Margin per day (PLN) TEXT
------


In [11]:
# query check


# from sqlalchemy.orm import Session 
session = Session(bind=engine)

result = session.query(Day_sell)



for item in result: 
    print(item)
    

    

<__main__.Day_sell object at 0x0000028103D77438>
<__main__.Day_sell object at 0x0000028103D774A8>
<__main__.Day_sell object at 0x0000028103D77518>
<__main__.Day_sell object at 0x0000028103D77588>
<__main__.Day_sell object at 0x0000028103D775F8>
<__main__.Day_sell object at 0x0000028103D77668>
<__main__.Day_sell object at 0x0000028103D776D8>
<__main__.Day_sell object at 0x0000028103D77748>
<__main__.Day_sell object at 0x0000028103D777B8>
<__main__.Day_sell object at 0x0000028103D77828>
<__main__.Day_sell object at 0x0000028103D77898>
<__main__.Day_sell object at 0x0000028103D77908>
<__main__.Day_sell object at 0x0000028103D77978>
<__main__.Day_sell object at 0x0000028103D779E8>
<__main__.Day_sell object at 0x0000028103D77A58>
<__main__.Day_sell object at 0x0000028103D77AC8>
<__main__.Day_sell object at 0x0000028103D77B38>
<__main__.Day_sell object at 0x0000028103D77BA8>
<__main__.Day_sell object at 0x0000028103D77C18>
<__main__.Day_sell object at 0x0000028103D77C88>
<__main__.Day_sell o