In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sqlalchemy import create_engine, Column, String, Integer, DateTime
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base

In [3]:
DB_URL = (
    "postgresql://test:must_be_eight_characters"
    + "@example-1.cluster-cculi2axzscc.us-east-1.rds.amazonaws.com:5432/test"
)

engine = create_engine(DB_URL)

# Create a session to interact with the database
Session = sessionmaker(bind=engine)
session = Session()

In [4]:
# Create a base class for declarative models
Base = declarative_base()


# Define the User model
class User(Base):
    __tablename__ = "users"

    id = Column(String, primary_key=True)
    name = Column(String)
    password = Column(String)
    gender = Column(String)
    date_of_birth = Column(DateTime)


# Define the TaxDetails model
class TaxDetails(Base):
    __tablename__ = "tax_details"

    tax_id = Column(String, primary_key=True)
    user_id = Column(String)
    year = Column(Integer)
    income = Column(Integer)
    taxable_income = Column(Integer)
    location = Column(String)
    tax_amount = Column(Integer)

In [5]:
# fetch all users and save it as a dataframe
users = session.query(User).all()
users_df = pd.DataFrame([user.__dict__ for user in users])

In [6]:
# fetch all tax details and save it as a dataframe
tax_details = session.query(TaxDetails).all()
tax_details_df = pd.DataFrame([tax_detail.__dict__ for tax_detail in tax_details])

In [7]:
users_df.head()

Unnamed: 0,_sa_instance_state,id,gender,date_of_birth,name,password
0,<sqlalchemy.orm.state.InstanceState object at ...,1,Male,1990-01-01,John Doe,password123
1,<sqlalchemy.orm.state.InstanceState object at ...,0001,male,1998-01-01,Dhrubo Kamal,$2b$12$H13QTJdzXESgvWcwQ3tS.uoXe7e0p6TGUEV/YTq...
2,<sqlalchemy.orm.state.InstanceState object at ...,00011,male,1998-01-01,Dhrubo Kamal,$2b$12$v7dQTrDddTmdXe7Sz5Z/H.Doo.iBmaifi.UDA64...
3,<sqlalchemy.orm.state.InstanceState object at ...,6274836f-d459-4536-a85d-b6692bd341f0,male,1985-01-01,User Name,Password
4,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,male,1982-01-01,User Name,Password


In [8]:
tax_details_df.head()

Unnamed: 0,_sa_instance_state,user_id,year,taxable_income,tax_amount,tax_id,income,location
0,<sqlalchemy.orm.state.InstanceState object at ...,1,2023,40000,10000,1,50000,New York
1,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,2015,551912,57786,a50a6cf7-efd5-4f0f-b177-c9a8b99ce107,901912,non_city
2,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,2016,514972,52245,a1c492a6-5df9-42ea-a2f8-010f3c6add98,864972,non_city
3,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,2017,552304,57845,04b1c0ba-6966-46c8-8c3b-e57d99a124df,902304,non_city
4,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,2018,511611,51741,83ea4070-60b2-4c30-ab3d-6dfd931c41a5,861611,non_city


In [9]:
# perform join on both the dataframes, join key is user_id.id == tax_details.user_id
df = pd.merge(users_df, tax_details_df, left_on="id", right_on="user_id")

In [10]:
df

Unnamed: 0,_sa_instance_state_x,id,gender,date_of_birth,name,password,_sa_instance_state_y,user_id,year,taxable_income,tax_amount,tax_id,income,location
0,<sqlalchemy.orm.state.InstanceState object at ...,1,Male,1990-01-01,John Doe,password123,<sqlalchemy.orm.state.InstanceState object at ...,1,2023,40000,10000,1,50000,New York
1,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,male,1982-01-01,User Name,Password,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,2015,551912,57786,a50a6cf7-efd5-4f0f-b177-c9a8b99ce107,901912,non_city
2,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,male,1982-01-01,User Name,Password,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,2016,514972,52245,a1c492a6-5df9-42ea-a2f8-010f3c6add98,864972,non_city
3,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,male,1982-01-01,User Name,Password,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,2017,552304,57845,04b1c0ba-6966-46c8-8c3b-e57d99a124df,902304,non_city
4,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,male,1982-01-01,User Name,Password,<sqlalchemy.orm.state.InstanceState object at ...,52ee6704-75cb-4e3a-8c9d-7e178e72c1bf,2018,511611,51741,83ea4070-60b2-4c30-ab3d-6dfd931c41a5,861611,non_city
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7073,<sqlalchemy.orm.state.InstanceState object at ...,4eecf54b-1df4-4eb9-8d60-2c15f3461b19,male,1960-01-01,User Name,Password,<sqlalchemy.orm.state.InstanceState object at ...,4eecf54b-1df4-4eb9-8d60-2c15f3461b19,2017,39729,3000,8e794ada-93cd-47ed-b95d-f1856dfd993e,389729,chittagong
7074,<sqlalchemy.orm.state.InstanceState object at ...,4eecf54b-1df4-4eb9-8d60-2c15f3461b19,male,1960-01-01,User Name,Password,<sqlalchemy.orm.state.InstanceState object at ...,4eecf54b-1df4-4eb9-8d60-2c15f3461b19,2018,15258,3000,9e30421f-d9e9-4b29-a917-1a7bded16382,365258,chittagong
7075,<sqlalchemy.orm.state.InstanceState object at ...,4eecf54b-1df4-4eb9-8d60-2c15f3461b19,male,1960-01-01,User Name,Password,<sqlalchemy.orm.state.InstanceState object at ...,4eecf54b-1df4-4eb9-8d60-2c15f3461b19,2019,0,0,8a2642d4-fc4a-4b0d-82ed-8233a301a96b,339102,chittagong
7076,<sqlalchemy.orm.state.InstanceState object at ...,4eecf54b-1df4-4eb9-8d60-2c15f3461b19,male,1960-01-01,User Name,Password,<sqlalchemy.orm.state.InstanceState object at ...,4eecf54b-1df4-4eb9-8d60-2c15f3461b19,2020,0,0,1831d136-5ad4-47bc-9851-41c0cb15b2bd,327338,chittagong


In [11]:
# columns to keep for analysis - gender, income, id, location, tax_amount, taxable_income, year of df

df.drop("_sa_instance_state_x", axis=1, inplace=True)
df.drop("date_of_birth", axis=1, inplace=True)
df.drop("name", axis=1, inplace=True)
df.drop("password", axis=1, inplace=True)
df.drop("id", axis=1, inplace=True)
df.drop("tax_id", axis=1, inplace=True)
df.drop("user_id", axis=1, inplace=True)
df.drop("_sa_instance_state_y", axis=1, inplace=True)

In [12]:
df.head()

Unnamed: 0,gender,year,taxable_income,tax_amount,income,location
0,Male,2023,40000,10000,50000,New York
1,male,2015,551912,57786,901912,non_city
2,male,2016,514972,52245,864972,non_city
3,male,2017,552304,57845,902304,non_city
4,male,2018,511611,51741,861611,non_city


In [13]:
# save the dataframe as a csv file
df.to_csv("data.csv", index=False)