### **INTERACCION CON MYSQL A TRAVÉS DE CLASES**

### LIBRERÍAS NECESARIAS:

In [1]:
import pandas as pd
import mysql.connector

### CLASE PRINCIPAL:

In [2]:
class MySQLConnection:
    def __init__(self, host, user, password, database):
        self.config = {
            'host': host,
            'user': user,
            'password': password,
            'database': database,
            'raise_on_warnings': True
        }
        self.connection = None
        self.cursor = None

    def connect(self):
        self.connection = mysql.connector.connect(**self.config)
        self.cursor = self.connection.cursor()
        

    def disconnect(self):
        if self.connection.is_connected():
            self.cursor.close()
            self.connection.close()

    def crear_database(self, database):
        self.connect()
        self.cursor.execute(f"CREATE DATABASE IF NOT EXISTS {database};")
        self.disconnect()

    def crear_tablas(self, query, database):
        self.connect()
        query_database = f"USE {database};"
        self.cursor.execute(query_database)
        self.cursor.execute(query)
        self.disconnect()

    def ejecutar_consulta(self, database, query):
        self.connect()
        self.cursor.execute(f"USE {database};")
        self.cursor.execute(query)
    
        resultados = self.cursor.fetchall()
    
        self.connection.commit()
        self.disconnect()
        return resultados
    
    def insertar_datos_porfila(self, df, database, tabla, primary_key):
        self.connect()
        self.cursor.execute(f"USE {database};")
        self.cursor.execute(f"SELECT * FROM {tabla} LIMIT 0;")

        k = 0 if primary_key else 1
        
        column_names = self.cursor.column_names[k:]
        self.cursor.fetchall()
        # Formatear los nombres de las columnas como una cadena
        column_names_str = ', '.join(column_names)

        # Preparar la plantilla de la consulta de inserción
        placeholders = ', '.join(['%s'] * len(column_names))
        query = f"INSERT INTO {tabla} ({column_names_str}) VALUES ({placeholders});"
        

        # Convertir el DataFrame a una lista de tuplas
        valores = [tuple(fila) for fila in df.values]

        # Ejecutar la consulta utilizando executemany
        self.cursor.executemany(query, valores)
        self.connection.commit()
        self.disconnect()
    
    def obtener_columnas(self, tabla, database, primary_key = False):
        self.connect()
        self.cursor.execute(f"USE {database};")
        self.cursor.execute(f"SELECT * FROM {tabla} LIMIT 0;")

        k = 0 if primary_key else 1
        
        column_names = self.cursor.column_names[k:]
        self.cursor.fetchall() # Vaciamos el cursor 
        return column_names

### QUERYS:

In [3]:
query_customers = """CREATE table customers (
	customer_id INT UNSIGNED AUTO_INCREMENT NOT NULL,
    age INT UNSIGNED,
    gender ENUM ( "Male", "Female", "Non-gender" ),
    location VARCHAR ( 100 ),
    subscription_status BOOL,
    preferred_payment_method  ENUM ( 'Venmo', 'Cash', 'Credit Card', 'PayPal', 'Bank Transfer', 'Debit Card' ),
    frequency_of_purchases ENUM ( 'Fortnightly', 'Weekly', 'Annually', 'Quarterly', 
    'Bi-Weekly', 'Monthly', 'Every 3 Months', 'Every 6 Months', 'Sporadic' ),
    PRIMARY KEY ( customer_id )
    );"""
query_categories = """CREATE table categories (
	category_id INT UNSIGNED AUTO_INCREMENT NOT NULL,
    category ENUM ( 'Clothing', 'Footwear', 'Outerwear', 'Accessories' ),
    PRIMARY KEY ( category_id )
	);"""
query_payments = """CREATE table payments (
	payment_method_id INT UNSIGNED AUTO_INCREMENT NOT NULL,
    payment_method ENUM ( 'Credit Card', 'Bank Transfer', 'Cash', 'PayPal', 'Venmo', 'Debit Card' ),
    PRIMARY KEY ( payment_method_id )
    );"""
query_shipping = """CREATE table shipping (
	shipping_type_id INT UNSIGNED AUTO_INCREMENT NOT NULL,
    shipping_type ENUM ( 'Express', 'Free Shipping', 'Next Day Air', 'Standard', '2-Day Shipping', 'Store Pickup' ),
    PRIMARY KEY ( shipping_type_id )
    );"""
query_characteristics = """CREATE table characteristics (
	characteristic_id INT UNSIGNED AUTO_INCREMENT NOT NULL,
	size ENUM ( 'L', 'S', 'M', 'XL' ),
	color ENUM ( 'Gray', 'Maroon', 'Turquoise', 'White', 'Charcoal', 'Silver', 'Pink', 'Purple', 'Olive', 
		'Gold', 'Violet', 'Teal', 'Lavender', 'Black', 'Green', 'Peach', 'Red', 'Cyan', 'Brown', 'Beige',
		'Orange', 'Indigo', 'Yellow', 'Magenta', 'Blue' ),
	season ENUM ( 'Winter', 'Spring', 'Summer', 'Fall' ),
    PRIMARY KEY ( characteristic_id )
    );"""
query_items = """CREATE table items (
	item_id INT UNSIGNED AUTO_INCREMENT NOT NULL,
	item_purchased ENUM ( 'Blouse', 'Sweater', 'Jeans', 'Sandals', 'Sneakers', 'Shirt', 'Shorts', 'Coat', 'Handbag', 
		'Shoes', 'Dress', 'Skirt', 'Sunglasses', 'Pants', 'Jacket', 'Hoodie', 'Jewelry', 'T-shirt', 'Scarf', 'Hat', 
        'Socks', 'Backpack', 'Belt', 'Boots', 'Gloves' ),
	category_id INT UNSIGNED,
	characteristic_id INT UNSIGNED,
    PRIMARY KEY ( item_id ),
    FOREIGN KEY ( category_id ) REFERENCES categories ( category_id ) ON DELETE SET NULL,
    FOREIGN KEY ( characteristic_id ) REFERENCES characteristics ( characteristic_id ) ON DELETE SET NULL
    );"""
query_purchases = """CREATE table purchases (
	purchase_id INT UNSIGNED AUTO_INCREMENT NOT NULL,
    purchase_amount_usd INT UNSIGNED,   
    discount_applied BOOL,
	promo_code_used BOOL,
	previous_purchases INT UNSIGNED,
	customer_id INT UNSIGNED,
	item_id INT UNSIGNED,
	payment_method_id INT UNSIGNED,
	shipping_type_id INT UNSIGNED,
    PRIMARY KEY ( purchase_id ),
    FOREIGN KEY ( customer_id ) REFERENCES customers (customer_id) ON DELETE SET NULL,
    FOREIGN KEY ( item_id ) REFERENCES items (item_id) ON DELETE SET NULL,
    FOREIGN KEY ( payment_method_id ) REFERENCES payments (payment_method_id) ON DELETE SET NULL,
    FOREIGN KEY ( shipping_type_id ) REFERENCES shipping (shipping_type_id) ON DELETE SET NULL
    );"""
query_reviews = """CREATE table reviews (
	review_id INT UNSIGNED AUTO_INCREMENT NOT NULL,
	customer_id INT UNSIGNED,
	item_id INT UNSIGNED,
	review_rating DECIMAL ( 2,1 ),
    PRIMARY KEY ( review_id ),
    FOREIGN KEY ( customer_id ) REFERENCES customers ( customer_id ) ON DELETE SET NULL,
    FOREIGN KEY ( item_id ) REFERENCES items ( item_id ) ON DELETE SET NULL
    );"""

### CREACIÓN:

**CREANDO CONEXIÓN:**

In [4]:
conexion_mysql = MySQLConnection(host='localhost', user='root', password='2JTovldO', database=None)

**CREANDO DATABASE**

In [5]:
conexion_mysql.crear_database("shopping_trends")

**CREANDO TABLAS**

In [6]:
conexion_mysql.crear_tablas(query_customers, "shopping_trends")
conexion_mysql.crear_tablas(query_categories, "shopping_trends")
conexion_mysql.crear_tablas(query_payments, "shopping_trends")
conexion_mysql.crear_tablas(query_shipping, "shopping_trends")
conexion_mysql.crear_tablas(query_characteristics, "shopping_trends")
conexion_mysql.crear_tablas(query_items, "shopping_trends")
conexion_mysql.crear_tablas(query_purchases, "shopping_trends")
conexion_mysql.crear_tablas(query_reviews, "shopping_trends")

### MANEJO DE DF:

**MODIFICACIÓN DE DATOS**

In [25]:
shop = pd.read_csv("../data/csv/shopping_trends.csv")
shop.columns = [col.lower().replace(' ', '_') for col in shop.columns]

In [26]:
#Modificando el tipo de dato a booleano
for i in range(len(shop)):
    if shop["subscription_status"][i] == "Yes":
        shop["subscription_status"][i] = True
    else:
        shop["subscription_status"][i] = False

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shop["subscription_status"][i] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shop["subscription_status"][i] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shop["subscription_status"][i] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shop["subscription_status"][i] = True
A value is tryin

In [27]:
#Modificando el tipo de dato a booleano
for i in range(len(shop)):
    if shop["discount_applied"][i] == "Yes":
        shop["discount_applied"][i] = True
    else:
        shop["discount_applied"][i] = False

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shop["discount_applied"][i] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shop["discount_applied"][i] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shop["discount_applied"][i] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shop["discount_applied"][i] = True
A value is trying to be set 

In [40]:
#Modificando el tipo de dato a booleano
for i in range(len(shop)):
    if shop["promo_code_used"][i] == "Yes":
        shop["promo_code_used"][i] = True
    else:
        shop["promo_code_used"][i] = False

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shop["promo_code_used"][i] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shop["promo_code_used"][i] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shop["promo_code_used"][i] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shop["promo_code_used"][i] = True
A value is trying to be set on a

In [28]:
shop = shop.rename(columns={'purchase_amount_(usd)': 'purchase_amount_usd'})

In [29]:
shop.head(1)

Unnamed: 0,customer_id,age,gender,item_purchased,category,purchase_amount_usd,location,size,color,season,review_rating,subscription_status,payment_method,shipping_type,discount_applied,promo_code_used,previous_purchases,preferred_payment_method,frequency_of_purchases
0,1,55,Male,Blouse,Clothing,53,Kentucky,L,Gray,Winter,3.1,True,Credit Card,Express,True,Yes,14,Venmo,Fortnightly


**OBTENCIÓN DE COLUMNAS DE CADA TABLA**

In [7]:
columnas_customers = conexion_mysql.obtener_columnas("customers", "shopping_trends", False)
columnas_categories = conexion_mysql.obtener_columnas("categories", "shopping_trends", False)
columnas_payments = conexion_mysql.obtener_columnas("payments", "shopping_trends", False)
columnas_shipping = conexion_mysql.obtener_columnas("shipping", "shopping_trends", False)
columnas_characteristics = conexion_mysql.obtener_columnas("characteristics", "shopping_trends", False)
columnas_items = conexion_mysql.obtener_columnas("items", "shopping_trends", True)
columnas_purchases = conexion_mysql.obtener_columnas("purchases", "shopping_trends", True)
columnas_reviews = conexion_mysql.obtener_columnas("reviews", "shopping_trends", True)

**CREANDO COLUMNAS FALTANTES:**
1. category_id

In [30]:
# Crear un diccionario que mapea cada categoría única a un category_id único
category_unicas = shop['category'].unique()
diccionario_category_id = {category: i + 1 for i, category in enumerate(category_unicas)}

# Crear la nueva columna 'category_id' utilizando el mapeo
shop['category_id'] = shop['category'].map(diccionario_category_id)

2. characteristic_id


In [31]:
characteristic_unicas = shop['customer_id'].unique()
shop['characteristic_id'] = characteristic_unicas

3. item_id

In [32]:
item_unicas = shop['customer_id'].unique()
diccionario_item_id = {item: i + 1 for i, item in enumerate(item_unicas)}
shop['item_id'] = shop['customer_id'].map(diccionario_item_id)

4. payment_method_id

In [35]:
payment_method_unicas = shop['payment_method'].unique()
diccionario_payment_method_id = {item: i + 1 for i, item in enumerate(payment_method_unicas)}
shop['payment_method_id'] = shop['payment_method'].map(diccionario_payment_method_id)

5. shipping_type_id

In [36]:
shipping_type_unicas = shop['shipping_type'].unique()
diccionario_shipping_type_id = {item: i + 1 for i, item in enumerate(shipping_type_unicas)}
shop['shipping_type_id'] = shop['shipping_type'].map(diccionario_shipping_type_id)

6. purchase_id

In [33]:
purchase_unicas = shop['customer_id'].unique()
shop['purchase_id'] = purchase_unicas

7. review_id

In [34]:
review_unicas = shop['customer_id'].unique()
shop['review_id'] = review_unicas

In [37]:
columnas_customers = list(columnas_customers)
columnas_categories = list(columnas_categories)
columnas_payments = list(columnas_payments)
columnas_shipping = list(columnas_shipping)
columnas_characteristics = list(columnas_characteristics)
columnas_items = list(columnas_items)
columnas_purchases = list(columnas_purchases)
columnas_reviews = list(columnas_reviews)

**CREANDO DFS INDEPENDIENTES**

In [41]:
df_customers = shop[columnas_customers].copy()
df_categories = pd.DataFrame({'category': shop['category'].unique()})
df_payments = pd.DataFrame({'payment_method': shop['payment_method'].unique()})
df_shipping = pd.DataFrame({'shipping_type': shop['shipping_type'].unique()})
df_characteristics = shop[columnas_characteristics].copy()
df_items = shop[columnas_items].copy()
df_purchases = shop[columnas_purchases].copy()
df_reviews = shop[columnas_reviews].copy()

### INSERCCIÓN DE DATOS:

In [23]:
conexion_mysql.insertar_datos_porfila(df_customers, "shopping_trends", "customers", False)
conexion_mysql.insertar_datos_porfila(df_customers, "shopping_trends", "customers", False)
conexion_mysql.insertar_datos_porfila(df_categories, "shopping_trends", "categories", False)
conexion_mysql.insertar_datos_porfila(df_payments, "shopping_trends", "payments", False)
conexion_mysql.insertar_datos_porfila(df_shipping, "shopping_trends", "shipping", False)
conexion_mysql.insertar_datos_porfila(df_characteristics, "shopping_trends", "characteristics", False)
conexion_mysql.insertar_datos_porfila(df_items, "shopping_trends", "items", True)
conexion_mysql.insertar_datos_porfila(df_purchases, "shopping_trends", "purchases", True)
conexion_mysql.insertar_datos_porfila(df_reviews, "shopping_trends", "reviews", True)