# Cineman Streamlit Project

## Load Dependencies

In [1]:
# conda install -c conda-forge selenium
# install geckodriver for mozilla

import selenium
from selenium import webdriver
#from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.keys import Keys

import requests
from bs4 import BeautifulSoup

import pandas as pd
import time
from datetime import date
import re

In [2]:
def isTimeFormat(input):
    try:
        time.strptime(input, '%H:%M')
        return True
    except ValueError:
        return False

## Scraping Cineman Data

### Loading and Saving Page Contents

In [3]:
driver = webdriver.Firefox()
driver.get("https://www.cineman.ch/en/showtimes/city/")
time.sleep(7)

cokkie_button = driver.find_element_by_class_name("cc-btn")
cokkie_button.click()
time.sleep(3)

sorting_buttons = driver.find_elements_by_class_name("text-overflow-hidden")
sorting_buttons[2].click()

region_dropdown = driver.find_element_by_class_name("selectize-control")
region_dropdown.click()

input_div = driver.find_elements_by_xpath('//input[@type="text"]')
input_div[6].send_keys("Zürich")
input_div[6].send_keys(Keys.RETURN)

save_button = driver.find_element_by_class_name("select-region-save")
save_button.click()
time.sleep(3)

content = BeautifulSoup(driver.page_source)
driver.close()

### Extracting Movie Showtimes

In [12]:
movies = content.findAll("div", {"class":"col-xs-12 col-sm-9"})

all_cinema_dicts = []
movies_list = []
genres_list = []
cinemas_list = []
places_list = []
age_limits = []
movie_links = []
all_showtimes_lists = []
all_languages_lists = []

for movie in movies:
    # Initializing the dictionary to store data
    all_info_dict = dict()
    
    # Movie title
    title = movie.find("h4").get_text()
    movies_list.append(title)
    
    # Movie genre
    genre = movie.find("p").get_text()
    genres_list.append(genre)
    
    # Cinemas and place
    cinemas = movie.findAll("h5")
    cinema_names = []
    places = []
    
    for cinema in cinemas:
        cinema_name = cinema.find("em").get_text()
        cinema_names.append(cinema_name)
        place = cinema.findAll("a")[1].get_text()
        places.append(place)
    
    cinemas_list.append(cinema_names)
    places_list.append(places)
    
    # Age limit
    age_links = movie.findAll("a", {"class": "link"})
    age_limit = age_links[-1].get_text()
    age_limits.append(age_limit)
    
    # Movie links
    movie_links_a = movie.findAll("a", href = True)
    movie_links.append(f'https://www.cineman.ch{movie_links_a[0]["href"]}')
    
    # Showtimes and languages
    showtimes_list_div = movie.find("div", {"class": "showtimes-list"})
    showtimes_string = showtimes_list_div.prettify().split("h5")
    showtimes_list = []
    languages_list = []
    
    for string in showtimes_string:
        strings = re.sub('<[^<]+?>\n', '', string).split(" ")
        showtimes = []
        languages = []
    
        for s in strings:
            s = s.strip("<></–)")
            s = re.sub("\t", "", s)
            s = s.strip()
         #   if s:
            #    print(s)
            if isTimeFormat(s):
                showtimes.append(s)
    
            elif (s.find("/") != -1 and s.find("Y.") == -1) or s in ["G", "F", "O"]:
                languages.append(s)
                
        if showtimes:
            showtimes_list.append(showtimes)
        if languages:
            languages_list.append(languages)
            
    if showtimes_list:
        all_showtimes_lists.append(showtimes_list)
    if languages_list:
        all_languages_lists.append(languages_list)
      
    
all_info_dict["movie"] = movies_list        
all_info_dict["genre"] = genres_list       
all_info_dict["cinema"] = cinemas_list    
all_info_dict["place"] = places_list
all_info_dict["movie_link"] = movie_links
all_info_dict["showtime"] = all_showtimes_lists
all_info_dict["age_limit"] = age_limits
all_info_dict["language"] = all_languages_lists

movie_program_df = pd.DataFrame(all_info_dict)

### Loading and Saving Movie Ratings

In [28]:
umovie_links = movie_program_df["movie_link"].unique()

ratings_dict = dict()
rating_list = []
title_list = []

for movie_link in umovie_links:
    driver = webdriver.PhantomJS()
  #  driver = webdriver.Firefox()
    driver.get(movie_link)
    time.sleep(5)   # have to wait for the advertisement to end

    cokkie_button = driver.find_element_by_class_name("cc-btn")
    cokkie_button.click()
    
    content2 = BeautifulSoup(driver.page_source)
    time.sleep(2)
    driver.close()
    
    # Title
    try:
        title = content2.find("span", {"itemprop":"itemreviewed"}).get_text()
        title_list.append(title)
    except AttributeError:
        title_list.append("not available")
    
    # Rating
    try:
        cineman_rating = content2.find("strong", {"class":"color-playstation"}).get_text()
        rating_list.append(cineman_rating)
    except AttributeError:
        rating_list.append("not available")
    
ratings_dict["movie"] = title_list
ratings_dict["rating"] = rating_list

## Formatting the DataFrame

In [29]:
movie_program_df = movie_program_df.explode(["cinema", "showtime", "place", "language"]).explode(["showtime", "language"])
movie_program_df["date"] 
movie_program_df

Unnamed: 0,movie,genre,cinema,place,movie_link,showtime,age_limit,language
0,Schachnovelle,Drama,Arthouse Le Paris,Zürich,https://www.cineman.ch/en/movie/2021/Schachnov...,12:15,12Y.,G
1,Nomadland,Drama,Arthouse Piccadilly,Zürich,https://www.cineman.ch/en/movie/2020/Nomadland/,13:00,12/6Y.,E/gf
2,Bigfoot Superstar,Animation,Arena Cinemas,Zürich,https://www.cineman.ch/en/movie/2019/BigfootSu...,13:20,Reservation,G
2,Bigfoot Superstar,Animation,Kosmos,Zürich,https://www.cineman.ch/en/movie/2019/BigfootSu...,13:30,Reservation,G
3,The Croods 2,"Adventure, Animation, Children, Comedy, Fantasy",Arena Cinemas,Zürich,https://www.cineman.ch/en/movie/2017/TheCroods2/,13:20,6Y.,G
...,...,...,...,...,...,...,...,...
148,Fantoche: Best-of Fantoche 2021,,Riffraff,Zürich,https://www.cineman.ch/en/showtimes/cinema/rif...,21:00,16Y.,O
149,French Exit,Drama,Kosmos,Zürich,https://www.cineman.ch/en/movie/2020/FrenchExit/,21:00,Reservation,E/gf
150,Riders of Justice - Helden der Wahrscheinlichkeit,,Riffraff,Zürich,https://www.cineman.ch/en/movie/2020/RidersofJ...,21:00,16Y.,O/gf
151,The Organ Donor,Horror,Arena Cinemas,Zürich,https://www.cineman.ch/en/movie/2020/SawIX/,21:00,18Y.,G


In [30]:
ratings_dict["movie"] = title_list
ratings_dict["rating"] = rating_list
ratings_df = pd.DataFrame(ratings_dict)
ratings_df.head()

Unnamed: 0,movie,rating
0,Schachnovelle,not available
1,Nomadland,3.9
2,Bigfoot Superstar,1.6
3,The Croods 2,3.9
4,Heitere Fahne,not available


In [36]:
cineman_df = pd.merge(movie_program_df, ratings_df, how = "left")
cineman_df

Unnamed: 0,movie,genre,cinema,place,movie_link,showtime,age_limit,language,rating
0,Schachnovelle,Drama,Arthouse Le Paris,Zürich,https://www.cineman.ch/en/movie/2021/Schachnov...,12:15,12Y.,G,not available
1,Nomadland,Drama,Arthouse Piccadilly,Zürich,https://www.cineman.ch/en/movie/2020/Nomadland/,13:00,12/6Y.,E/gf,3.9
2,Bigfoot Superstar,Animation,Arena Cinemas,Zürich,https://www.cineman.ch/en/movie/2019/BigfootSu...,13:20,Reservation,G,1.6
3,Bigfoot Superstar,Animation,Kosmos,Zürich,https://www.cineman.ch/en/movie/2019/BigfootSu...,13:30,Reservation,G,1.6
4,The Croods 2,"Adventure, Animation, Children, Comedy, Fantasy",Arena Cinemas,Zürich,https://www.cineman.ch/en/movie/2017/TheCroods2/,13:20,6Y.,G,3.9
...,...,...,...,...,...,...,...,...,...
224,Fantoche: Best-of Fantoche 2021,,Riffraff,Zürich,https://www.cineman.ch/en/showtimes/cinema/rif...,21:00,16Y.,O,
225,French Exit,Drama,Kosmos,Zürich,https://www.cineman.ch/en/movie/2020/FrenchExit/,21:00,Reservation,E/gf,3.0
226,Riders of Justice - Helden der Wahrscheinlichkeit,,Riffraff,Zürich,https://www.cineman.ch/en/movie/2020/RidersofJ...,21:00,16Y.,O/gf,3.3
227,The Organ Donor,Horror,Arena Cinemas,Zürich,https://www.cineman.ch/en/movie/2020/SawIX/,21:00,18Y.,G,2.5


In [39]:
today = date.today()
today

datetime.date(2021, 9, 19)

In [40]:
cineman_df.to_csv(f"{date.today()}_showtimes_zurich.csv")

## Location Data - Google API