In [21]:
import pandas as pd
import datetime

In [30]:
df = pd.read_csv('Data/data.csv')
df.head()

Unnamed: 0,transaction_id,user_id,product_id,amount,timestamp
0,1,10,101,49.99,2023-06-01 12:45:00
1,2,11,101,49.99,2023-06-01 13:20:00
2,3,10,102,29.99,2023-06-02 09:00:00
3,4,12,103,79.99,2023-06-02 17:15:00
4,5,10,101,49.99,2023-06-03 08:30:00


### Parse timestamp  to datetime


In [31]:
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.head()

Unnamed: 0,transaction_id,user_id,product_id,amount,timestamp
0,1,10,101,49.99,2023-06-01 12:45:00
1,2,11,101,49.99,2023-06-01 13:20:00
2,3,10,102,29.99,2023-06-02 09:00:00
3,4,12,103,79.99,2023-06-02 17:15:00
4,5,10,101,49.99,2023-06-03 08:30:00


### Create a column with week name

In [32]:
df['day_of_week'] = df['timestamp'].dt.day_name()
df

Unnamed: 0,transaction_id,user_id,product_id,amount,timestamp,day_of_week
0,1,10,101,49.99,2023-06-01 12:45:00,Thursday
1,2,11,101,49.99,2023-06-01 13:20:00,Thursday
2,3,10,102,29.99,2023-06-02 09:00:00,Friday
3,4,12,103,79.99,2023-06-02 17:15:00,Friday
4,5,10,101,49.99,2023-06-03 08:30:00,Saturday
5,6,11,104,59.99,2023-06-04 10:45:00,Sunday


###  Separate time form timestamp

In [33]:
df['hours_of_day'] = df['timestamp'].dt.time
df

Unnamed: 0,transaction_id,user_id,product_id,amount,timestamp,day_of_week,hours_of_day
0,1,10,101,49.99,2023-06-01 12:45:00,Thursday,12:45:00
1,2,11,101,49.99,2023-06-01 13:20:00,Thursday,13:20:00
2,3,10,102,29.99,2023-06-02 09:00:00,Friday,09:00:00
3,4,12,103,79.99,2023-06-02 17:15:00,Friday,17:15:00
4,5,10,101,49.99,2023-06-03 08:30:00,Saturday,08:30:00
5,6,11,104,59.99,2023-06-04 10:45:00,Sunday,10:45:00


### Sum amount of cost per user and average transation cost

In [37]:
new_df = df.groupby('user_id')['amount'].agg(
    total_spent= 'sum',
    avg_transaction_value = 'mean',
    transaction_count = 'count'
).reset_index().sort_values('total_spent',ascending= False)

new_df

Unnamed: 0,user_id,total_spent,avg_transaction_value,transaction_count
0,10,129.97,43.323333,3
1,11,109.98,54.99,2
2,12,79.99,79.99,1


### Load to db


In [None]:
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

load_dotenv()

user = os.getenv("DB_USER")
password = os.getenv("DB_PASSWORD")
host = os.getenv("DB_HOST")
port = os.getenv("DB_PORT")
db_name = os.getenv("DB_NAME")


engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{db_name}")
df.to_sql('user_stats', engine, if_exists='replace', index=False)