## Data preparation and export 

### 1. Import libraries 

In [1]:

import pandas as pd
import mysql.connector
from sqlalchemy import create_engine

### 2. Data import and preparation

#### 2.1 artists

In [None]:
# Read your CSV file into a Pandas dataframe
df_artist= pd.read_csv("artist.csv")
df_artist.head()#Initial check to verify the columns and data 

In [None]:
print("This df has " + str(df_artist.shape[0])+" rows and "+str(df_artist.shape[1])+" colums." )
print("v---------Null Values---------v")
print(df_artist.isnull().sum())#Check any null values , in this case null values are only on middle names which is a column we can drop


In [4]:
df_artist.drop("middle_names", axis=1, inplace=True)#Drop the unnecessary column

In [None]:
df_artist.dtypes # Verify the data type of each column.Eventhough birth and death columns refer to dates only have the year therefore 
#we keep the int dtype for now 

#### 2.2 canvas_size

In [None]:
# Read your CSV file into a Pandas dataframe
df_canvas_size= pd.read_csv("canvas_size.csv")
df_canvas_size.head()#Initial check to verify the columns and data 

In [None]:
print("This df has " + str(df_canvas_size.shape[0])+" rows and "+str(df_canvas_size.shape[1])+" colums." )
print("v---------Null Values---------v")
print(df_canvas_size.isnull().sum())#Check any null values , all paintings should have a height value we will drop the null values 

In [8]:
df_canvas_size.dropna(inplace=True)

In [None]:
df_canvas_size.dtypes

#### 2.3 image_link

In [None]:
# Read your CSV file into a Pandas dataframe
df_image_link= pd.read_csv("image_link.csv")
df_image_link.head()#Initial check to verify the columns and data 

In [None]:
print("This df has " + str(df_image_link.shape[0])+" rows and "+str(df_image_link.shape[1])+" colums." )
print("v---------Null Values---------v")
print(df_image_link.isnull().sum())#Check any null values ,


In [None]:
df_image_link.dropna(inplace=True)
df_image_link.dtypes

#### 2.4 museum_hours

In [None]:
# Read your CSV file into a Pandas dataframe
df_museum_hours= pd.read_csv("museum_hours.csv")
df_museum_hours.head()#Initial check to verify the columns and data 
#openning and closing hours include the "AM" and "PM" in their values , we will remove those and convert it to 24HR 

In [None]:
# Extract the last 3 characters to a new column
df_museum_hours['openTime'] = df_museum_hours['open'].str[-2:]
df_museum_hours['open'] = df_museum_hours['open'].str[:-3]
############################################################
df_museum_hours['CloseTime'] = df_museum_hours['close'].str[-2:]
df_museum_hours['close'] = df_museum_hours['close'].str[:-3]
df_museum_hours.head()
############################################################

In [None]:
# Function to convert to 24-hour time
def convert_to_24_hour(time, period):
    if period == 'PM' and time[:2] != '12':
        hour, minute = map(int, time.split(':'))
        hour += 12  # Add 12 to convert to 24-hour format
    elif period == 'AM' and time[:2] == '12':
        hour, minute = 0, int(time.split(':')[1])  # Handle midnight (12:00 AM)
    else:
        hour, minute = map(int, time.split(':'))  # No conversion needed for other cases

    return f'{hour:02}:{minute:02}'  # Ensure 2-digit formatting

# Apply the conversion function to 'open' and 'close' columns
df_museum_hours['open_24hr'] = df_museum_hours.apply(
    lambda x: convert_to_24_hour(x['open'], x['openTime']), axis=1
)
df_museum_hours['close_24hr'] = df_museum_hours.apply(
    lambda x: convert_to_24_hour(x['close'], x['CloseTime']), axis=1
)

# Display the modified DataFrame
df_museum_hours.head()

In [None]:
df_museum_hours.drop(['open', 'close', 'openTime', 'CloseTime'], axis=1, inplace=True)#Drop the unnecessary columns
df_museum_hours.rename(columns={'open_24hr': 'open','close_24hr':'close'}, inplace=True)#Rename columns to original name
df_museum_hours.head()

In [None]:

print("This df has " + str(df_museum_hours.shape[0])+" rows and "+str(df_museum_hours.shape[1])+" colums." )
print("v---------Null Values---------v")
print(df_museum_hours.isnull().sum())#Check any null values , in this case null values are only on middle names which is a column we can drop


In [None]:
df_museum_hours.dtypes

#### 2.5 museum

In [None]:
# Read your CSV file into a Pandas dataframe
df_museum= pd.read_csv("museum.csv")
df_museum.head()#Initial check to verify the columns and data 

In [None]:


print("This df has " + str(df_museum.shape[0])+" rows and "+str(df_museum.shape[1])+" colums." )
print("v---------Null Values---------v")
print(df_museum.isnull().sum())#Check any null values , in this case null values are only on middle names which is a column we can drop



In [None]:
df_museum.dtypes

#### 2.6 product_size

In [None]:
# Read your CSV file into a Pandas dataframe
df_product_size= pd.read_csv("product_size.csv")
df_product_size.head()#Initial check to verify the columns and data 

In [None]:
print("This df has " + str(df_product_size.shape[0])+" rows and "+str(df_product_size.shape[1])+" colums." )
print("v---------Null Values---------v")
print(df_product_size.isnull().sum())#Check any null values , in this case null values are only on middle names which is a column we can drop

In [None]:
df_product_size.dtypes

#### 2.7 subject

In [None]:
# Read your CSV file into a Pandas dataframe
df_subject= pd.read_csv("subject.csv")
df_subject.head()#Initial check to verify the columns and data 

In [None]:
print("This df has " + str(df_subject.shape[0])+" rows and "+str(df_subject.shape[1])+" colums." )
print("v---------Null Values---------v")
print(df_subject.isnull().sum())#Check any null values , in this case null values are only on middle names which is a column we can drop

In [None]:
df_subject.dtypes

#### 2.8 work

In [None]:
# Read your CSV file into a Pandas dataframe
df_work= pd.read_csv("work.csv")
df_work.head()#Initial check to verify the columns and data 

In [None]:

print("This df has " + str(df_work.shape[0])+" rows and "+str(df_work.shape[1])+" colums." )
print("v---------Null Values---------v")
print(df_work.isnull().sum())#Check any null values , in this case null values are only on middle names which is a column we can drop


In [None]:
df_work.dtypes

### 3. MySql Connection 

In [None]:
#Input the host name ,user and password of the server
mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  password="Vivoenalemania2024"
)

print(mydb) #Verify the connection


In [None]:
mycursor=mydb.cursor()
mycursor.execute("CREATE DATABASE IF NOT EXISTS sql_paintings") # Create a Database  if not exists already 
print("Database created or already exists.") # Confirmation message 

In [None]:
mycursor.execute("SHOW DATABASES") # Verify the existing databases, our previously created DB should be listed here
for db in mycursor:
    print(db)

In [None]:
# Store the dataframe in MySQL
engine = sqlalchemy.create_engine('mysql+mysqlconnector://root:Vivoenalemania2024@localhost/sql_painting')
with engine.begin() as connection:
    df.to_sql('your_table', con=connection, if_exists='replace', index=False)

In [None]:
files = ['artist', 'canvas_size', 'image_link', 'museum_hours', 'museum', 'product_size', 'subject', 'work']

for file in files:
    df = pd.read_csv(f'/Users/thoufiq/THOUFIQ/techTFQ/YouTube/VIDEOS/SQL Queries/SQL Case Studies - Datasets/Famous Paintings/Dataset/{file}.csv')
    df.to_sql(file, con=conn, if_exists='replace', index=False)
