# BULKING NOTEBOOK 

In [1]:
import numpy as np
import pandas as pd
import sqlite3
import datetime

### STEPS FOR DATABASING
1. read in data
1.1 connect to db
2. create data base and check that it does not exist
2.1 create raw table
3. clean and transform data function
3.1 data times and ints and no strings in int catagories
4. create silver/transformed table
5. move data to silver table.

In [28]:
path = '/Users/geraldlittlejr/Documents/vs_files/bulking/bulking_file.txt'
df = pd.read_csv(path)
df

Unnamed: 0,Date,weight(lbs),workout_days,missed_meals,protein,creatine
0,Nov202023,200.2,4.0,3,1.0,1.0
1,Nov272023,202.2,5.0,1,1.0,1.0
2,Dec042023,206.6,4.5,0,0.0,1.0
3,Dec112023,207.6,4.5,2,1.0,1.0
4,Dec182023,208.6,2.0,6,0.0,0.0
5,Dec262023,204.8,1.0,7,0.0,0.0
6,JAN022024,207.5,3.0,5,1.0,1.0
7,Jan092024,204.8,3.0,5,1.0,1.0
8,JAN162024,206.5,1.0,3(?),1.0,1.0
9,Jan222024,211.4,5.0,1,1.0,1.0


In [34]:
# Create db instance, connection and cursor to db instance
name = "fitness_tracker_database.db"
con = sqlite3.connect(name)
cur = con.cursor()
bronze_table = "bronze_table"
silver_table = "silver_table"
gold_table = "gold_table"

In [35]:
# Create table and insert data from pandas dataframe
try:
    df.to_sql(bronze_table, 
              con, 
              if_exists='replace', 
              index=False,
              dtype={
                  'Date': 'TEXT',
                  'weight(lbs)': 'TEXT',
                  'workout_days': 'TEXT',
                  'missed_meals': 'TEXT',
                  'protein': 'TEXT',
                  'creatine': 'TEXT',
})

except Exception as e:
    print(f'There was an error with table creation: ', e)
else:
    print(f'Table created successfully')
    
    # CHECC THAT A TABLE WAS CREATED
    res = cur.execute(f"SELECT name FROM sqlite_master").fetchall()
    print(res)

Table created successfully
[('bronze_table',)]


In [33]:
cur.execute(f"SELECT * FROM bronze_table")
rows = cur.fetchall()
for row in rows:
    print(row)


('Nov202023', '200.2', '4.0', '3', '1.0', '1.0')
('Nov272023', '202.2', '5.0', '1', '1.0', '1.0')
('Dec042023', '206.6', '4.5', '0', '0.0', '1.0')
('Dec112023', '207.6', '4.5', '2', '1.0', '1.0')
('Dec182023', '208.6', '2.0', '6', '0.0', '0.0')
('Dec262023', '204.8', '1.0', '7', '0.0', '0.0')
('JAN022024', '207.5', '3.0', '5', '1.0', '1.0')
('Jan092024', '204.8', '3.0', '5', '1.0', '1.0')
('JAN162024', '206.5', '1.0', '3(?)', '1.0', '1.0')
('Jan222024', '211.4', '5.0', '1', '1.0', '1.0')
('Jan292024', '210.4', '1.0', '3', '0.0', '0.0')
('Feb05', '210', '1.0', '4', '1.0', '1.0')
('Feb122024', '212.6', '4.0', '2', '1.0', '1.0')
('Feb192024', '212.6', '1.0', '6', '0.0', '0.0')
('Feb262024', '211.8', '1.0', '2', '0.0', '0.0')
('Mar032024', '215.6(?)', '0.0', '3', '0.0', '0.0')
('Mar112024', '216.8', '0.0', '0', '0.0', '0.0')
('Mar252024', '215', '0.0', '2', '0.0', '0.0')
('1APR2024', '215.6', '1.0', '3', '0.0', '0.0')
('7APR2024', '217.8', '0.0', '4', '0.0', '0.0')
('14APR2024', '217.5', '

In [32]:
# CHECC FOR NON EXISTANT TABLE
res = cur.execute(f"SELECT name FROM sqlite_master WHERE name='foo'")
res.fetchone() is None

True

In [49]:
# Table Transformations
def clean_numerical(col):
    import re
    # Remove any non-numeric characters using regular expressions
    return re.sub(r'[^\d.]+', '', col)

def add_default_year(date_str, default_year=2024):
    if len(date_str) == 5:
        return date_str + default_year
    return date_str

def date_transform(date):
    import re

    for str in date:
        if isinstance(date, pd.Timestamp):
            continue
    match = re.search(r'([a-zA-Z]{3}\d{2})', date)
    if match:
        add_default_year(date)

    elif re.search(r'\d+[a-zA-Z]{3}\d{4}', date):
            date = pd.to_datetime(date, errors='coerce', format='%d%b%Y').strftime('%d%b%Y')

    return date_str

def table_transform(df):
    df['Date'] = df['Date'].apply(lambda x: add_default_year(x))
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce', format='%b%d%Y').dt.strftime('%b%d%Y')

    df['weight(lbs)'] = df['weight(lbs)'].astype('str').apply(clean_numerical)
    df['weight(lbs)'] = df['weight(lbs)'].astype('float').round(1)

    df['workout_days'] = df['workout_days'].astype('int')  

    df['missed_meals'] = df['missed_meals'].astype('str').apply(clean_numerical)
    df['missed_meals'] = df['missed_meals'].astype('int')

    df['protein'] = df['protein'].astype('str').apply(clean_numerical)
    df['protein'] = df['protein'].astype('int')

    df['creatine'] = df['creatine'].astype('str').apply(clean_numerical)
    df['creatine'] = df['creatine'].astype('int')

    return df

In [48]:
date = '1jan2024'
pd.to_datetime(date, errors='coerce', format='%d%b%Y').strftime('%b%d%Y')

'Jan012024'

In [42]:
# Read data from the database and transform it
try:
    silver_df = pd.read_sql_query(f"SELECT * FROM {bronze_table}", con, parse_dates={'Date': '%b%d%Y'})
    print(silver_df)
    transform_df = table_transform(silver_df)
    
except AttributeError as e:
    print('There was an error with the transformation function:', e)
except sqlite3.OperationalError as e:
    print('There was an error with the SQLite operation:', e)
except Exception as e:
    print('An unexpected error occurred:', e)
finally:
    print(silver_table)

         Date weight(lbs) workout_days missed_meals protein creatine
0  2023-11-20       200.2          4.0            3     1.0      1.0
1  2023-11-27       202.2          5.0            1     1.0      1.0
2  2023-12-04       206.6          4.5            0     0.0      1.0
3  2023-12-11       207.6          4.5            2     1.0      1.0
4  2023-12-18       208.6          2.0            6     0.0      0.0
5  2023-12-26       204.8          1.0            7     0.0      0.0
6  2024-01-02       207.5          3.0            5     1.0      1.0
7  2024-01-09       204.8          3.0            5     1.0      1.0
8  2024-01-16       206.5          1.0         3(?)     1.0      1.0
9  2024-01-22       211.4          5.0            1     1.0      1.0
10 2024-01-29       210.4          1.0            3     0.0      0.0
11        NaT         210          1.0            4     1.0      1.0
12 2024-02-12       212.6          4.0            2     1.0      1.0
13 2024-02-19       212.6         

In [50]:
dates = silver_df['Date'][10:20].copy()
dates = dates.apply(date_transform)
print(dates)

TypeError: expected string or bytes-like object