In [1]:
import pandas as pd
import json

# Read raw JSON
with open('users.json', 'r') as f:
    raw_data = f.read().strip().splitlines()

# Parse each JSON object
data = [json.loads(line) for line in raw_data]

# Convert to DataFrame
users_df = pd.json_normalize(data)

print(users_df.head())


   active      role signUpSource state                  _id.$oid  \
0    True  consumer        Email    WI  5ff1e194b6a9d73a3a9f1052   
1    True  consumer        Email    WI  5ff1e194b6a9d73a3a9f1052   
2    True  consumer        Email    WI  5ff1e194b6a9d73a3a9f1052   
3    True  consumer        Email    WI  5ff1e1eacfcf6c399c274ae6   
4    True  consumer        Email    WI  5ff1e194b6a9d73a3a9f1052   

   createdDate.$date  lastLogin.$date  
0      1609687444800     1.609688e+12  
1      1609687444800     1.609688e+12  
2      1609687444800     1.609688e+12  
3      1609687530554     1.609688e+12  
4      1609687444800     1.609688e+12  


In [2]:
# Read raw JSON
with open('brands.json', 'r') as f:
    raw_data = f.read().strip().splitlines()

# Parse each JSON object
data = [json.loads(line) for line in raw_data]

# Convert to DataFrame
brands_df = pd.json_normalize(data)

print(brands_df.head())


        barcode        category      categoryCode                       name  \
0  511111019862          Baking            BAKING  test brand @1612366101024   
1  511111519928       Beverages         BEVERAGES                  Starbucks   
2  511111819905          Baking            BAKING  test brand @1612366146176   
3  511111519874          Baking            BAKING  test brand @1612366146051   
4  511111319917  Candy & Sweets  CANDY_AND_SWEETS  test brand @1612366146827   

  topBrand                  _id.$oid              cpg.$id.$oid cpg.$ref  \
0    False  601ac115be37ce2ead437551  601ac114be37ce2ead437550     Cogs   
1    False  601c5460be37ce2ead43755f  5332f5fbe4b03c9a25efd0ba     Cogs   
2    False  601ac142be37ce2ead43755d  601ac142be37ce2ead437559     Cogs   
3    False  601ac142be37ce2ead43755a  601ac142be37ce2ead437559     Cogs   
4    False  601ac142be37ce2ead43755e  5332fa12e4b03c9a25efd1e7     Cogs   

                       brandCode  
0                            NaN 

In [3]:
# Read raw JSON
with open('receipts.json', 'r') as f:
    raw_data = f.read().strip().splitlines()

# Parse each JSON object
data = [json.loads(line) for line in raw_data]

# Convert to DataFrame
receipts_df = pd.json_normalize(data)

print(receipts_df.head())


   bonusPointsEarned                            bonusPointsEarnedReason  \
0              500.0  Receipt number 2 completed, bonus point schedu...   
1              150.0  Receipt number 5 completed, bonus point schedu...   
2                5.0                         All-receipts receipt bonus   
3                5.0                         All-receipts receipt bonus   
4                5.0                         All-receipts receipt bonus   

  pointsEarned  purchasedItemCount  \
0        500.0                 5.0   
1        150.0                 2.0   
2            5                 1.0   
3          5.0                 4.0   
4          5.0                 2.0   

                              rewardsReceiptItemList rewardsReceiptStatus  \
0  [{'barcode': '4011', 'description': 'ITEM NOT ...             FINISHED   
1  [{'barcode': '4011', 'description': 'ITEM NOT ...             FINISHED   
2  [{'needsFetchReview': False, 'partnerItemId': ...             REJECTED   
3  [{'barcod

In [4]:
print(brands_df.info())
print(receipts_df.info())
print(users_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1167 entries, 0 to 1166
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   barcode       1167 non-null   object
 1   category      1012 non-null   object
 2   categoryCode  517 non-null    object
 3   name          1167 non-null   object
 4   topBrand      555 non-null    object
 5   _id.$oid      1167 non-null   object
 6   cpg.$id.$oid  1167 non-null   object
 7   cpg.$ref      1167 non-null   object
 8   brandCode     933 non-null    object
dtypes: object(9)
memory usage: 82.2+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1119 entries, 0 to 1118
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   bonusPointsEarned        544 non-null    float64
 1   bonusPointsEarnedReason  544 non-null    object 
 2   pointsEarned             609 non-null    object 
 3   purc

In [5]:
def clean_users(users_df):
    # convert the date columns to datetime for users_df
    users_df['created_date'] = pd.to_datetime(users_df['createdDate.$date'])
    users_df['last_login'] = pd.to_datetime(users_df['lastLogin.$date'])

    # Rename columns
    users_df = users_df.rename(columns={
        '_id.$oid': 'user_id',
        'active': 'active',
        'role': 'role',
        'signUpSource': 'sign_up_source'
    })
    # Drop columns
    users_df = users_df.drop(columns=['createdDate.$date', 'lastLogin.$date']).drop_duplicates().reset_index(drop=True)

    return users_df

In [6]:
users_df_cleaned = clean_users(users_df)
print(users_df_cleaned.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 212 entries, 0 to 211
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   active          212 non-null    bool          
 1   role            212 non-null    object        
 2   sign_up_source  207 non-null    object        
 3   state           206 non-null    object        
 4   user_id         212 non-null    object        
 5   created_date    212 non-null    datetime64[ns]
 6   last_login      172 non-null    datetime64[ns]
dtypes: bool(1), datetime64[ns](2), object(4)
memory usage: 10.3+ KB
None


In [7]:
def create_cpg_df(brands_df):
    # creata new dataframe with cpg id and cpg reference
    cpg_df = brands_df[['cpg.$id.$oid', 'cpg.$ref']].drop_duplicates().reset_index(drop=True)
    cpg_df = cpg_df.rename(columns={'cpg.$id.$oid': 'cpg_id', 'cpg.$ref': 'cpg_reference'})
    return cpg_df

cpg_df = create_cpg_df(brands_df)
print(cpg_df.head())
print(cpg_df.info())

                     cpg_id cpg_reference
0  601ac114be37ce2ead437550          Cogs
1  5332f5fbe4b03c9a25efd0ba          Cogs
2  601ac142be37ce2ead437559          Cogs
3  5332fa12e4b03c9a25efd1e7          Cogs
4  559c2234e4b06aca36af13c6          Cogs
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 204 entries, 0 to 203
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   cpg_id         204 non-null    object
 1   cpg_reference  204 non-null    object
dtypes: object(2)
memory usage: 3.3+ KB
None


In [8]:
def create_category_df(brands_df):
    # create new dataframe with category id and category
    category_df = brands_df[['category', 'categoryCode', 'cpg.$id.$oid']].drop_duplicates().reset_index(drop=True)
    category_df = category_df.rename(columns={'cpg.$id.$oid': 'cpg_id', 'category': 'category_name', 'categoryCode': 'category_code'})
    
    return category_df

category_df = create_category_df(brands_df)
print(category_df.head())
print(category_df.info())

         category_name     category_code                    cpg_id
0               Baking            BAKING  601ac114be37ce2ead437550
1            Beverages         BEVERAGES  5332f5fbe4b03c9a25efd0ba
2               Baking            BAKING  601ac142be37ce2ead437559
3       Candy & Sweets  CANDY_AND_SWEETS  5332fa12e4b03c9a25efd1e7
4  Condiments & Sauces               NaN  559c2234e4b06aca36af13c6
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 259 entries, 0 to 258
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   category_name  232 non-null    object
 1   category_code  167 non-null    object
 2   cpg_id         259 non-null    object
dtypes: object(3)
memory usage: 6.2+ KB
None


In [9]:
def clean_brands(brands_df):
    # Rename columns
    brands_df = brands_df.rename(columns={
        '_id.$oid': 'brand_uuid',
        'barcode': 'barcode',
        'brandCode': 'brand_code',
        'name': 'brand_name',
        'categoryCode': 'category_code',
        'topBrand': 'top_brand',
    })
    # Drop columns
    brands_df = brands_df.drop(columns=['cpg.$id.$oid', 'cpg.$ref', 'category']).drop_duplicates().reset_index(drop=True)

    return brands_df

brands_df_cleaned = clean_brands(brands_df)
print(brands_df_cleaned.head())
print(brands_df_cleaned.info())

        barcode     category_code                 brand_name top_brand  \
0  511111019862            BAKING  test brand @1612366101024     False   
1  511111519928         BEVERAGES                  Starbucks     False   
2  511111819905            BAKING  test brand @1612366146176     False   
3  511111519874            BAKING  test brand @1612366146051     False   
4  511111319917  CANDY_AND_SWEETS  test brand @1612366146827     False   

                 brand_uuid                     brand_code  
0  601ac115be37ce2ead437551                            NaN  
1  601c5460be37ce2ead43755f                      STARBUCKS  
2  601ac142be37ce2ead43755d  TEST BRANDCODE @1612366146176  
3  601ac142be37ce2ead43755a  TEST BRANDCODE @1612366146051  
4  601ac142be37ce2ead43755e  TEST BRANDCODE @1612366146827  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1167 entries, 0 to 1166
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----

In [10]:
def create_products_table(receipts_df, brands_df):
    # create new dataframe with barcode of receipts as product id, description,
    # metabrite_campaign_id, partner_item_id, rewards_group, rewards_product_partner_id,
    # item_price, competitive_product, competitor_rewards_group and brand_id
    products_df = receipts_df[['rewardsReceiptItemList']].explode('rewardsReceiptItemList')
    products_df = products_df.dropna(subset=['rewardsReceiptItemList']).reset_index(drop=True)
    products_df = products_df.rename(columns={'rewardsReceiptItemList': 'product'})
    products_df = pd.json_normalize(products_df['product'])
    products_df = products_df.rename(columns={
        'barcode': 'product_id',
        'description': 'description',
        'itemPrice': 'product_price',
        'metabriteCampaignId': 'metabrite_campaign_id',
        'rewardsGroup': 'rewards_group',
        'rewardsProductPartnerId': 'rewards_product_partner_id',
        'brandCode': 'brand_code',
        'originalMetaBriteItemPrice': 'original_metabrite_item_price',
        'originalMetaBriteBarcode': 'original_metabrite_barcode',
        'originalMetaBriteDescription': 'original_metabrite_description',
        'competitorRewardsGroup': 'competitor_rewards_group',
        'competitiveProduct': 'competitive_product',
    })
    # merge with brands_df to get brand_id and drop brand_code
    products_df = products_df.merge(brands_df, on='brand_code', how='left')
    # select columns
    products_df = products_df[['product_id',
                               'brand_uuid',
                                'description',
                                'metabrite_campaign_id',
                                'original_metabrite_barcode',
                                'original_metabrite_description',
                                'original_metabrite_item_price',
                                'rewards_group',
                                'rewards_product_partner_id',
                                'product_price',
                                'competitive_product',
                                'competitor_rewards_group'
                                ]].drop_duplicates().reset_index(drop=True)
    return products_df

products_df = create_products_table(receipts_df, brands_df_cleaned)
print(products_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 429545 entries, 0 to 429544
Data columns (total 12 columns):
 #   Column                          Non-Null Count   Dtype 
---  ------                          --------------   ----- 
 0   product_id                      82694 non-null   object
 1   brand_uuid                      428844 non-null  object
 2   description                     414101 non-null  object
 3   metabrite_campaign_id           525 non-null     object
 4   original_metabrite_barcode      11232 non-null   object
 5   original_metabrite_description  468 non-null     object
 6   original_metabrite_item_price   468 non-null     object
 7   rewards_group                   43868 non-null   object
 8   rewards_product_partner_id      59397 non-null   object
 9   product_price                   429077 non-null  object
 10  competitive_product             14399 non-null   object
 11  competitor_rewards_group        92 non-null      object
dtypes: object(12)
memory usage: 39

In [14]:
def create_receipts_table(receipts_df):
    receipts_df = receipts_df.rename(columns={
        '_id.$oid': 'receipt_uuid',
        'bonusPointsEarned': 'bonus_points_earned',
        'bonusPointsEarnedReason': 'bonus_points_reason',
        'createDate.$date': 'create_date',
        'dateScanned.$date': 'date_scanned',
        'finishedDate.$date': 'finished_date',
        'modifyDate.$date': 'modify_date',
        'pointsAwardedDate.$date': 'points_awarded_date',
        'pointsEarned': 'points_earned',
        'purchaseDate.$date': 'purchase_date',
        'purchasedItemCount': 'purchased_item_count',
        'rewardsReceiptStatus': 'rewards_receipt_status',
        'rewardsReceiptStatusReason': 'rewards_receipt_status_reason',
        'totalSpent': 'total_spent',
        'userId.$oid': 'user_id'
    })
    receipts_df['create_date'] = pd.to_datetime(receipts_df['create_date'])
    receipts_df['date_scanned'] = pd.to_datetime(receipts_df['date_scanned'])
    receipts_df['finished_date'] = pd.to_datetime(receipts_df['finished_date'])
    receipts_df['modify_date'] = pd.to_datetime(receipts_df['modify_date'])
    receipts_df['points_awarded_date'] = pd.to_datetime(receipts_df['points_awarded_date'])
    receipts_df['purchase_date'] = pd.to_datetime(receipts_df['purchase_date'])
    receipts_df = receipts_df.drop(columns=['rewardsReceiptItemList']).drop_duplicates().reset_index(drop=True)
    return receipts_df

receipts_df_cleaned = create_receipts_table(receipts_df)
print(receipts_df_cleaned.info())
print(receipts_df_cleaned.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1119 entries, 0 to 1118
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   bonus_points_earned     544 non-null    float64       
 1   bonus_points_reason     544 non-null    object        
 2   points_earned           609 non-null    object        
 3   purchased_item_count    635 non-null    float64       
 4   rewards_receipt_status  1119 non-null   object        
 5   total_spent             684 non-null    object        
 6   userId                  1119 non-null   object        
 7   receipt_uuid            1119 non-null   object        
 8   create_date             1119 non-null   datetime64[ns]
 9   date_scanned            1119 non-null   datetime64[ns]
 10  finished_date           568 non-null    datetime64[ns]
 11  modify_date             1119 non-null   datetime64[ns]
 12  points_awarded_date     537 non-null    datetime

In [55]:
import ast
def create_receipt_items_table(receipts_df):
    with open('receipts.json', 'r') as f:
        raw_data = f.read().strip()
    # Convert to JSON Lines (each line is a valid JSON object)
    json_objects = [json.loads(line) for line in raw_data.strip().split("\n") if "rewardsReceiptItemList" in line] 
    print(json_objects[0])

    # Normalize the nested `rewardsReceiptItemList`
    receipt_items_df = pd.json_normalize(
        json_objects,
        record_path=["rewardsReceiptItemList"],  # Flatten this field
        meta=["_id"],  # Keep these at the top level
    )
    # replace null barcode with itemNumber and drop itemNumber
    receipt_items_df['barcode'] = receipt_items_df['barcode'].fillna(receipt_items_df['itemNumber'])
    receipt_items_df["receipt_id"] = receipt_items_df["_id"].apply(lambda x: x["$oid"])
    # rename all camelCase columns to snake_case
    receipt_items_df = receipt_items_df.rename(columns={
        'barcode': 'product_id',
        'finalPrice': 'final_price',
        'needsFetchReview': 'needs_fetch_review',
        'partnerItemId': 'partner_item_id',
        'preventTargetGapPoints': 'prevent_target_gap_points',
        'quantityPurchased': 'quantity_purchased',
        'userFlaggedBarcode': 'user_flagged_barcode',
        'userFlaggedNewItem': 'user_flagged_new_item',
        'userFlaggedPrice': 'user_flagged_price',
        'userFlaggedQuantity': 'user_flagged_quantity',
        'needsFetchReviewReason': 'needs_fetch_review_reason',
        'pointsNotAwardedReason': 'points_not_awarded_reason',
        'userFlaggedDescription': 'user_flagged_description',
        'pointsPayerId': 'points_payer_id',
        'rewardsGroup': 'rewards_group',
        'rewardsProductPartnerId': 'rewards_product_partner_id',
        'discountedItemPrice': 'discounted_item_price',
        'originalReceiptItemText': 'original_receipt_item_text',
        'originalMetaBriteQuantityPurchased': 'original_metabrite_quantity_purchased',
        'pointsEarned': 'points_earned',
        'targetPrice': 'target_price',
        'originalFinalPrice': 'original_final_price',
        'priceAfterCoupon': 'price_after_coupon'
    })
    # select columns all those renamed earlier
    receipt_items_df = receipt_items_df[[
        'receipt_id',
        'product_id',
        'final_price',
        'needs_fetch_review',
        'partner_item_id',
        'prevent_target_gap_points',
        'quantity_purchased',
        'user_flagged_barcode',
        'user_flagged_new_item',
        'user_flagged_price',
        'user_flagged_quantity',
        'needs_fetch_review_reason',
        'points_not_awarded_reason',
        'points_payer_id',
        'rewards_group',
        'user_flagged_description',
        'rewards_product_partner_id',
        'discounted_item_price',
        'original_receipt_item_text',
        'original_metabrite_quantity_purchased',
        'points_earned',
        'target_price',
        'original_final_price',
        'price_after_coupon'
    ]].drop_duplicates().reset_index(drop=True)
    return receipt_items_df

receipt_items_df = create_receipt_items_table(receipts_df)

{'_id': {'$oid': '5ff1e1eb0a720f0523000575'}, 'bonusPointsEarned': 500, 'bonusPointsEarnedReason': 'Receipt number 2 completed, bonus point schedule DEFAULT (5cefdcacf3693e0b50e83a36)', 'createDate': {'$date': 1609687531000}, 'dateScanned': {'$date': 1609687531000}, 'finishedDate': {'$date': 1609687531000}, 'modifyDate': {'$date': 1609687536000}, 'pointsAwardedDate': {'$date': 1609687531000}, 'pointsEarned': '500.0', 'purchaseDate': {'$date': 1609632000000}, 'purchasedItemCount': 5, 'rewardsReceiptItemList': [{'barcode': '4011', 'description': 'ITEM NOT FOUND', 'finalPrice': '26.00', 'itemPrice': '26.00', 'needsFetchReview': False, 'partnerItemId': '1', 'preventTargetGapPoints': True, 'quantityPurchased': 5, 'userFlaggedBarcode': '4011', 'userFlaggedNewItem': True, 'userFlaggedPrice': '26.00', 'userFlaggedQuantity': 5}], 'rewardsReceiptStatus': 'FINISHED', 'totalSpent': '26.00', 'userId': '5ff1e1eacfcf6c399c274ae6'}


In [56]:
print(receipt_items_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6941 entries, 0 to 6940
Data columns (total 24 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   receipt_id                             6941 non-null   object 
 1   product_id                             3090 non-null   object 
 2   final_price                            6767 non-null   object 
 3   needs_fetch_review                     813 non-null    object 
 4   partner_item_id                        6941 non-null   object 
 5   prevent_target_gap_points              358 non-null    object 
 6   quantity_purchased                     6767 non-null   float64
 7   user_flagged_barcode                   337 non-null    object 
 8   user_flagged_new_item                  323 non-null    object 
 9   user_flagged_price                     299 non-null    object 
 10  user_flagged_quantity                  299 non-null    float64
 11  need

In [23]:
print(users_df.head())

   index  active      role sign_up_source state                   user_id  \
0      0    True  consumer          Email    WI  5ff1e194b6a9d73a3a9f1052   
1      3    True  consumer          Email    WI  5ff1e1eacfcf6c399c274ae6   
2      6    True  consumer          Email    WI  5ff1e1e8cfcf6c399c274ad9   
3      7    True  consumer          Email    WI  5ff1e1b7cfcf6c399c274a5a   
4      9    True  consumer          Email    WI  5ff1e1f1cfcf6c399c274b0b   

                   created_date                    last_login  
0 1970-01-01 00:26:49.687444800 1970-01-01 00:26:49.687537858  
1 1970-01-01 00:26:49.687530554 1970-01-01 00:26:49.687530597  
2 1970-01-01 00:26:49.687528354 1970-01-01 00:26:49.687528392  
3 1970-01-01 00:26:49.687479626 1970-01-01 00:26:49.687479665  
4 1970-01-01 00:26:49.687537564 1970-01-01 00:26:49.687537599  


In [None]:
# convert the date columns to datetime for users_df
users_df['createdDate'] = pd.to_datetime(users_df['createdDate'])
users_df['lastLogin'] = pd.to_datetime(users_df['lastLogin'])

# convert the date columns to datetime for receipts_df
receipts_df['createDate'] = pd.to_datetime(receipts_df['createDate'])
receipts_df['dateScanned'] = pd.to_datetime(receipts_df['dateScanned'])
receipts_df['finishedDate'] = pd.to_datetime(receipts_df['finishedDate'])
receipts_df['modifyDate'] = pd.to_datetime(receipts_df['modifyDate'])
receipts_df['pointsAwardedDate'] = pd.to_datetime(receipts_df['pointsAwardedDate'])
receipts_df['purchaseDate'] = pd.to_datetime(receipts_df['purchaseDate'])



In [61]:
import sqlite3

# Create a new SQLite database called fetch_rewards.db
conn = sqlite3.connect('fetch_rewards.db')

# Create a cursor object
cursor = conn.cursor()
cursor.execute("DROP TABLE IF EXISTS users")
cursor.execute("""
    CREATE TABLE users (
        user_id TEXT PRIMARY KEY,
        active INTEGER,
        role TEXT,
        sign_up_source TEXT,
        created_date TIMESTAMP,
        last_login TIMESTAMP,
        state TEXT
    );
""")

# insert data into the users table from a DataFrame
cursor.execute("""INSERT INTO users (user_id, active, role, sign_up_source, created_date, last_login, state) VALUES (?, ?, ?, ?, ?, ?, ?)""",
               (users_df_cleaned['user_id'], users_df_cleaned['active'], users_df_cleaned['role'], users_df_cleaned['sign_up_source'], users_df_cleaned['created_date'], users_df_cleaned['last_login'], users_df_cleaned['state']))
conn.commit()
cursor.execute("SELECT * FROM users limit 10")
print(cursor.fetchall())
conn.close()



ProgrammingError: Error binding parameter 1: type 'Series' is not supported

In [62]:
users_df_cleaned.to_sql("users", conn, if_exists="append", index=False, method="multi")

212

In [64]:

cursor.execute("SELECT * FROM users limit 10")
print(cursor.fetchall())

[('5ff1e194b6a9d73a3a9f1052', 1, 'consumer', 'Email', '1970-01-01 00:26:49.687444', '1970-01-01 00:26:49.687537', 'WI'), ('5ff1e1eacfcf6c399c274ae6', 1, 'consumer', 'Email', '1970-01-01 00:26:49.687530', '1970-01-01 00:26:49.687530', 'WI'), ('5ff1e1e8cfcf6c399c274ad9', 1, 'consumer', 'Email', '1970-01-01 00:26:49.687528', '1970-01-01 00:26:49.687528', 'WI'), ('5ff1e1b7cfcf6c399c274a5a', 1, 'consumer', 'Email', '1970-01-01 00:26:49.687479', '1970-01-01 00:26:49.687479', 'WI'), ('5ff1e1f1cfcf6c399c274b0b', 1, 'consumer', 'Email', '1970-01-01 00:26:49.687537', '1970-01-01 00:26:49.687537', 'WI'), ('5ff1e1e4cfcf6c399c274ac3', 1, 'consumer', 'Email', '1970-01-01 00:26:49.687524', '1970-01-01 00:26:49.687524', 'WI'), ('5ff1e1b4cfcf6c399c274a54', 1, 'consumer', 'Email', '1970-01-01 00:26:49.687476', '1970-01-01 00:26:49.687476', 'WI'), ('5ff370c562fde912123a5e0e', 1, 'consumer', 'Email', '1970-01-01 00:26:49.789637', '1970-01-01 00:26:49.789850', 'WI'), ('5ff36d0362fde912123a5535', 1, 'consum

In [66]:
from sqlalchemy import create_engine
# Create a connection to the database
engine = create_engine("sqlite:///fetch_rewards.db")
# Write the DataFrame to the SQLite database
users_df_cleaned.to_sql("users", engine, if_exists="replace", index=False)

212

In [68]:
from sqlalchemy import text

with engine.connect() as connection:
        result = connection.execute(text("SELECT * FROM users"))
        print(result.fetchall())

[(1, 'consumer', 'Email', 'WI', '5ff1e194b6a9d73a3a9f1052', '1970-01-01 00:26:49.687444', '1970-01-01 00:26:49.687537'), (1, 'consumer', 'Email', 'WI', '5ff1e1eacfcf6c399c274ae6', '1970-01-01 00:26:49.687530', '1970-01-01 00:26:49.687530'), (1, 'consumer', 'Email', 'WI', '5ff1e1e8cfcf6c399c274ad9', '1970-01-01 00:26:49.687528', '1970-01-01 00:26:49.687528'), (1, 'consumer', 'Email', 'WI', '5ff1e1b7cfcf6c399c274a5a', '1970-01-01 00:26:49.687479', '1970-01-01 00:26:49.687479'), (1, 'consumer', 'Email', 'WI', '5ff1e1f1cfcf6c399c274b0b', '1970-01-01 00:26:49.687537', '1970-01-01 00:26:49.687537'), (1, 'consumer', 'Email', 'WI', '5ff1e1e4cfcf6c399c274ac3', '1970-01-01 00:26:49.687524', '1970-01-01 00:26:49.687524'), (1, 'consumer', 'Email', 'WI', '5ff1e1b4cfcf6c399c274a54', '1970-01-01 00:26:49.687476', '1970-01-01 00:26:49.687476'), (1, 'consumer', 'Email', 'WI', '5ff370c562fde912123a5e0e', '1970-01-01 00:26:49.789637', '1970-01-01 00:26:49.789850'), (1, 'consumer', 'Email', 'WI', '5ff36d0