In [1]:
"""Code to format info on movies in a csv file for publication."""

# built-in libraries
import csv
import datetime
from email.message import EmailMessage
import os.path
import re
import smtplib
import sqlite3
import sys

# installed with pip
import pandas as pd
#from sqlalchemy import create_engine



In [2]:
# Create list of dictionaries for each row.
listings = []
scrapedir = '/Users/rayd/workspace/cinema/scrapes/'
scrapefile = 'fandango-2019-05-16.csv'
scrape_path = scrapedir + scrapefile
with open(scrape_path, 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        layout_dct = {
            'addy': row[0],
            'theater': row[1],
            'title': row[2],
            'mpaa': row[3],
            'stars': row[4],
            'times': row[5],
            'format': row[6],
            'synopsis': row[7],
            'date': row[8],
            'city': row[9]
        }
        listings.append(layout_dct)
        
del listings[0] #remove header row   

In [3]:
# Create Pandas dataframe for listings
df = pd.DataFrame(listings)
# Remove some duplicate listings
df = df[df.city != "Belleview, FL"]
df = df[df.city != "High Springs, FL"]
df = df[(df.city != "The Villages, FL") & (df.theater != "Belleview Cinemas")]
df = df[(df.city != "The Villages, FL") & (df.theater != "Old Mill Playhouse")]
df = df[(df.city != "The Villages, FL") & (df.theater != "Old Mill Playhouse")]
times = (df.times.str.split('|', expand=True).rename(columns=lambda x: f"time_{x+1}"))
times = times.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
times = times.apply(lambda x: x.str.replace('p', ' p.m.') if x.dtype == "object" else x)
times = times.apply(lambda x: x.str.replace('a', ' a.m.') if x.dtype == "object" else x)
times = times.apply(lambda x: x.str.replace(':00', '') if x.dtype == "object" else x)
df = df.drop('times', axis=1)
df = pd.concat([df, times], axis=1, join_axes=[df.index])
df['streetaddy'] = df.addy.str.split(',').str[0]
df.streetaddy = df.streetaddy.str.replace('Blvd', 'Blvd.')
df.streetaddy = df.streetaddy.str.replace('Blvd..', 'Blvd.')
df.streetaddy = df.streetaddy.str.replace('Avenue', 'Ave')
df.streetaddy = df.streetaddy.str.replace('Ave', 'Ave.')
df.streetaddy = df.streetaddy.str.replace('Ave..', 'Ave.')
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df['movieday'] = df['date'].dt.weekday_name
df = df.drop('city', axis=1)
df['cityaddy'] = df.addy.str.split(',').str[1]
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
df = df.drop('addy', axis=1)

In [4]:
# Ocala market
oc6_df = df.loc[df['theater'] == 'Ocala Center 6']
ocd_dr = df.loc[df['theater'] == 'Ocala Drive-In']
bv_df = df.loc[df['theater'] == 'Belleview Cinemas']
mt_df = df.loc[df['theater'] == 'Marion Theatre']
omp_df = df.loc[df['theater'] == 'Old Mill Playhouse']
regaloca_df = df.loc[df['theater'] == 'Regal Hollywood Stadium 16 & IMAX - Ocala']
rialto_df = df.loc[df['theater'] == 'Rialto Theatre Spanish Springs Town Square']
amc_df = df.loc[df['theater'] == 'AMC Lake Square 12']
barn_df = df.loc[df['theater'] == 'Barnstorm Theater']

# Gainesville market
celeb_df = df.loc[df['theater'] == 'Celebration Pointe 10']
hipp_df = df.loc[df['theater'] == 'Hippodrome State Theatre']
butler_df = df.loc[df['theater'] == 'Regal Butler Town Center 14']
regalgnv_df = df.loc[df['theater'] == 'Regal Royal Park Stadium 16']
uac_df = df.loc[df['theater'] == 'UA Cinema 90 6']
starke_df = df.loc[df['theater'] == 'Florida Twin']
uac_df = df.loc[df['theater'] == 'UA Cinema 90 6']

In [None]:
"""
This would be an approach using a database
"""
# Put new cinema scrape data into db
#db_directory = os.path.dirname(os.path.abspath(__file__))
#db_file = os.path.join(db_directory, "rinspect.sqlite")
db_file = "cinema_db.sqlite"
movie_table = 'movies' # table for movie data
conn = sqlite3.connect(db_file)
cur = conn.cursor()
df.to_sql('movies', conn, if_exists='replace')

# Build narrative by accessing database


In [None]:
"""This would be an approach using named tuples"""

In [None]:
"""This would be an approach using a dict of dicts"""

In [None]:
"""Useful functions, if needed"""

# Convert datetime object into day of the week; But this is accomplished in the df work already.
def get_day(date_object=None):
    if not date_object:
        date_object = datetime.datetime.now()
    stamp = ""
    stamp += datetime.datetime.strftime(date_object, "%A")
    return(stamp)

In [19]:
oc6_df.head()


Unnamed: 0,date,format,mpaa,stars,synopsis,theater,title,time_1,time_2,time_3,time_4,time_5,time_6,time_7,time_8,streetaddy,movieday,cityaddy
22,2019-05-16,Standard,PG-13,5.0,"Adrift in space with no food or water, Tony St...",Ocala Center 6,Avengers: Endgame (2019),12 p.m.,1 p.m.,3:45 p.m.,4:45 p.m.,7:30 p.m.,,,,2021 Silver Springs Blvd.,Thursday,Ocala
37,2019-05-16,Standard,R,4.0,When Fred Flarsky (Seth Rogen) reunites with h...,Ocala Center 6,Long Shot,1 p.m.,4 p.m.,7 p.m.,,,,,,2021 Silver Springs Blvd.,Thursday,Ocala
45,2019-05-16,Standard,PG,4.5,The story begins when ace detective Harry Good...,Ocala Center 6,Pokémon Detective Pikachu,1 p.m.,4 p.m.,7 p.m.,,,,,,2021 Silver Springs Blvd.,Thursday,Ocala
117,2019-05-16,Standard,PG,4.0,"Unconventionality rules in UGLYDOLLS, the new ...",Ocala Center 6,UglyDolls,1 p.m.,4 p.m.,7 p.m.,,,,,,2021 Silver Springs Blvd.,Thursday,Ocala
149,2019-05-16,Standard,PG-13,4.0,A psychological thriller about a young married...,Ocala Center 6,The Intruder (2019),1 p.m.,4 p.m.,7 p.m.,,,,,,2021 Silver Springs Blvd.,Thursday,Ocala


In [22]:
for row in oc6_df.itertuples():
    if row[17] == 'Thursday':
        print(row[8], row[9], row[10], row[11], row[12], row[13], row[14], row[15])

12 p.m. 1 p.m. 3:45 p.m. 4:45 p.m. 7:30 p.m. None None None
1 p.m. 4 p.m. 7 p.m. None None None None None
1 p.m. 4 p.m. 7 p.m. None None None None None
1 p.m. 4 p.m. 7 p.m. None None None None None
1 p.m. 4 p.m. 7 p.m. None None None None None


In [26]:
for row in oc6_df.itertuples():
    print(row[Index=22])

SyntaxError: invalid syntax (<ipython-input-26-098930ea7dde>, line 2)