In [1]:
import pandas as pd
import ast

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)


In [2]:
episodes = pd.read_csv('Datasets/episodes.csv')
paintings = pd.read_csv('Datasets/paintings.csv')
dates = pd.read_fwf('Datasets/dates', header = None)


In [3]:
# Rename the existing column to 'combined'
dates.rename(columns={0: 'combined'}, inplace=True)

# Regex pattern to extract title and date (excluding notes)
pattern = r'^"([^"]+)" \(([^)]+)\)'

# Extract Title and Date  and Notes using regex pattern
dates[['Title', 'Date']] = dates['combined'].str.extract(pattern, expand=True)

# Convert 'Date' to datetime
dates['Date'] = pd.to_datetime(dates['Date'])

# Extract the month and create a new 'Month' column
dates['Month'] = dates['Date'].dt.month_name()

# Drop the 'combined' column
dates.drop(columns=['combined'], inplace=True)


In [4]:
# Convert episode column headers to title case
episodes.columns = episodes.columns.str.title()

# Rename title headers of episodes and paintings files to all match 'Title'
episodes.rename(columns={'TITLE': 'Title'}, inplace=True)
paintings.rename(columns={'painting_title': 'Title'}, inplace=True)

# Convert episode titles in columns 'Title' for each dataset to title case
episodes['Title'] = episodes['Title'].str.title()
dates['Title'] = dates['Title'].str.title()
paintings['Title'] = paintings['Title'].str.title()

# Remove double quotes from the titles in episode DataFrame
episodes['Title'] = episodes['Title'].str.replace('"', '')

# Evaluate string as a literal list
paintings['colors'] = paintings['colors'].apply(ast.literal_eval)

# Remove carriage return and newline characters
paintings['colors'] = paintings['colors'].apply(lambda x: [color.replace('\r\n', '') for color in x])

# Convert list back into string
paintings['colors'] = paintings['colors'].apply(str)

# Remove single quotes and square brackets from paintings 'colors' column
paintings['colors'] = paintings['colors'].str.replace('[\'\[\]]', '', regex=True)

# Remove single quotes and square brackets from 'color_hex' column in paintings
paintings['color_hex'] = paintings['color_hex'].str.replace('[\'\[\]]', '', regex=True)




In [5]:
# List of frame columns
frame_columns = [
    "Apple_Frame", "Circle_Frame", "Double_Oval_Frame", "Florida_Frame", "Framed",
    "Half_Circle_Frame", "Half_Oval_Frame", "Oval_Frame", "Rectangle_3D_Frame",
    "Rectangular_Frame", "Seashell_Frame", "Split_Frame", "Tomb_Frame", "Triple_Frame", "Wood_Framed"
]

# Function to identify used frames
def identify_frames(row):
    used_frames = [frame for frame in frame_columns if row[frame] == 1]
    return ', '.join(used_frames) if used_frames else 'None'

  # Apply the function to the episodes DataFrame
episodes['Frame'] = episodes.apply(identify_frames, axis=1)



In [6]:
# Merge episodes and paintings
merged_df = pd.merge(dates, paintings, on='Title')

# Merge the result with dates DataFrame
final_df = pd.merge(merged_df, episodes, on='Title')

# Drop unnecessary columns season, episode, unnamed, and the frame_columns
final_df.drop(columns=['season', 'episode', 'Unnamed: 0',] + frame_columns, inplace=True)

# Order columns
ordered_columns = [
  'Title', 'Date', 'Month', 'Episode', 'painting_index', 'img_src', 'youtube_src',
  'num_colors', 'colors', 'color_hex', 'Frame', 'Black_Gesso', 'Bright_Red', 'Burnt_Umber',
  'Cadmium_Yellow', 'Dark_Sienna', 'Indian_Red', 'Indian_Yellow', 'Liquid_Black',
  'Liquid_Clear', 'Midnight_Black', 'Phthalo_Blue', 'Phthalo_Green', 'Prussian_Blue',
  'Sap_Green', 'Titanium_White', 'Van_Dyke_Brown', 'Yellow_Ochre', 'Alizarin_Crimson',
  'Aurora_Borealis', 'Barn', 'Beach', 'Boat', 'Bridge', 'Building', 'Bushes', 'Cabin',
  'Cactus', 'Cirrus', 'Cliff', 'Clouds', 'Conifer', 'Cumulus', 'Deciduous', 'Diane_Andre',
  'Dock', 'Farm', 'Fence', 'Fire', 'Flowers', 'Fog', 'Grass', 'Guest', 'Hills', 'Lake',
  'Lakes', 'Lighthouse', 'Mill', 'Moon', 'Mountain', 'Mountains', 'Night', 'Ocean',
  'Palm_Trees', 'Path', 'Person', 'Portrait', 'River', 'Rocks', 'Snow', 'Snowy_Mountain',
  'Steve_Ross', 'Structure', 'Sun', 'Tree', 'Trees', 'Waterfall', 'Waves', 'Windmill', 'Winter']
final_df = final_df[ordered_columns]

# Save to new CSV
final_df.to_csv('joyofpainting.csv', index=False)
