## Loading the Dataset
### This code snippet utilizes Pandas' read_csv function to load a dataset into a DataFrame.


In [1]:
import pandas as pd

# Specify the file path to your dataset
file_path = 'Mistral.csv'

# Read the dataset into a pandas DataFrame
df = pd.read_csv(file_path)

# Print the first few rows of the DataFrame to verify
print("Dataset loaded successfully.")
print(df.head())

Dataset loaded successfully.
  TransactionID CustomerID CustomerDOB CustGender CustLocation  \
0            T1   C5841053     10/1/94       Male   JAMSHEDPUR   
1            T2   C2142763      4/4/57     Female      JHAJJAR   
2            T3   C4417068    26/11/96     Female       MUMBAI   
3            T4   C5342380     14/9/73     Female       MUMBAI   
4            T5   C9031234     24/3/88       Male  NAVI MUMBAI   

   CustAccountBalance TransactionDate  TransactionTime  STATUS_ bplayer0  \
0            17819.05      2023-02-08           143207      1.0    Vayne   
1             2270.69      2023-02-08           141858      1.0     Kled   
2            17874.44      2023-02-08           142712      1.0   Darius   
3           866503.21      2023-02-08           142714      1.0   Singed   
4             6714.43      2023-02-08           181156      1.0    Urgot   

   product_id          category_id                        category_code  \
0     3900821  2053013552326770905  applia

## Preprocessing Data
### Handling missing values,Handling duplicates,Reset index after dropping rows,Print the first few rows of the cleaned DataFrame

In [2]:
# Handling missing values
# Drop rows with any missing values
df.dropna(inplace=True)

# Handling duplicates
# Drop duplicate rows if any
df.drop_duplicates(inplace=True)

# Reset index after dropping rows
df.reset_index(drop=True, inplace=True)

# Print the first few rows of the cleaned DataFrame
print("Cleaned dataset:")
print(df.head())

Cleaned dataset:
  TransactionID CustomerID CustomerDOB CustGender CustLocation  \
0            T1   C5841053     10/1/94       Male   JAMSHEDPUR   
1            T2   C2142763      4/4/57     Female      JHAJJAR   
2            T3   C4417068    26/11/96     Female       MUMBAI   
3            T4   C5342380     14/9/73     Female       MUMBAI   
4            T5   C9031234     24/3/88       Male  NAVI MUMBAI   

   CustAccountBalance TransactionDate  TransactionTime  STATUS_ bplayer0  \
0            17819.05      2023-02-08           143207      1.0    Vayne   
1             2270.69      2023-02-08           141858      1.0     Kled   
2            17874.44      2023-02-08           142712      1.0   Darius   
3           866503.21      2023-02-08           142714      1.0   Singed   
4             6714.43      2023-02-08           181156      1.0    Urgot   

   product_id          category_id                        category_code  \
0     3900821  2053013552326770905  appliances.environ

### Retrieve dataset dimensions, construct descriptive info, and print it.






In [3]:
# Get the dimensions of the dataset
num_rows, num_cols = df.shape

# Prepare descriptive information about the dataset
dataset_info = f"Dataset contains {num_rows} rows and {num_cols} columns."

# Print the dataset information
print(dataset_info)


Dataset contains 994284 rows and 17 columns.


## Connect to a PostgreSQL database using psycopg2 in Python.






In [5]:

import psycopg2

# Database connection parameters
dbname = 'transaction_data_db'
user = 'postgres'
password = 'm123'
host = 'localhost'
port = '5432'

# Connect to the default database (postgres) to create a new database
conn = psycopg2.connect(dbname='postgres', user=user, password=password, host=host, port=port)
conn.autocommit = True
cur = conn.cursor()

# Create a new database if it doesn't exist
cur.execute(f"SELECT 1 FROM pg_catalog.pg_database WHERE datname = '{dbname}'")
exists = cur.fetchone()
if not exists:
    cur.execute(f"CREATE DATABASE {dbname}")
    print(f"Database '{dbname}' created successfully.")
else:
    print(f"Database '{dbname}' already exists.")

cur.close()
conn.close()


Database 'transaction_data_db' created successfully.


## Establish a connection to a PostgreSQL database using SQLAlchemy in Python.






In [6]:

from sqlalchemy import create_engine

# Specify the PostgreSQL database connection URL
db_url = f'postgresql://{user}:{password}@{host}:{port}/{dbname}'

# Establish a connection to the PostgreSQL database
engine = create_engine(db_url)


## Write DataFrame to a PostgreSQL table

In [7]:

# Define the table name in the database
table_name = 'transaction_data_table'

# Write the DataFrame to the PostgreSQL database
df.to_sql(table_name, engine, if_exists='replace', index=False)

# Print a message to confirm the data import
print("Dataset loaded successfully into PostgreSQL database.")


Dataset loaded successfully into PostgreSQL database.
