## Scraping [1/2]

In [6]:
import requests
from bs4 import BeautifulSoup
import csv


def strToInt(str):
  try:
    return int(str)
  except:
    return 0


def elementToObject(element, date):
  episodeNameElements = element.findAll('a')
  seasonAndEpisode = episodeNameElements[1].text.split('.')
  channelElement = element.find_previous_sibling('img')
  contryElements = channelElement.find_previous_sibling('img')
  object = {
    'name': episodeNameElements[0].text,
    'episode': strToInt(seasonAndEpisode[1]),
    'season': strToInt(seasonAndEpisode[0]),
    'date': date,
    'country': contryElements.get('alt'),
    'channel': channelElement.get('alt'),
    'url': episodeNameElements[0].get('href')
  }
  return object


url = 'https://www.spin-off.fr/calendrier_des_series.html'
response = requests.get(url)
content = response.content
page = BeautifulSoup(content, 'html')
currentMonth = page.find_all('td', class_=['floatleftmobile td_jour', 'floatleftmobile td_jour td_jourcourant'])


episodes = []
for day in currentMonth:
  dateElement = day.find('div', class_=['div_jour', 'div_jourcourant fond_degrade_v4'])
  if dateElement:
    date = dateElement.get('id').split('_')[1]
    episodesElements = day.find_all('span', class_=['calendrier_episodes'])
    for episodeElement in episodesElements:
      episodes.append(elementToObject(episodeElement, date))

# print(episodes)

## Fichiers

In [7]:
def objectToStringforCsv(episode):
  return f'{episode['name']},{episode['episode']},{episode['season']},{episode['date']},{episode['country']},{episode['channel']},{episode['url']}\n'

strEpisodes = [
  objectToStringforCsv(episode)
  for episode in episodes
]

with open('./data/files/episodes.csv', mode='w+', encoding='utf-8') as file:
  file.write(''.join(strEpisodes))

In [8]:
def lineToTuples(line):
  values = line.split(',')
  print(values)

def arrayToEpisode(values):
  return {
    'name': values[0],
    'episode': int(values[1]),
    'season': int(values[2]),
    'date': values[3],
    'country': values[4],
    'channel': values[5],
    'url': values[6]
  }

typeTuples = []
episodes = []
with open('./data/files/episodes.csv', 'r', encoding='utf-8') as file:
  for line in file:
    types = []
    values = line.strip().split(',')
    episodes.append(arrayToEpisode(values))
    for value in values:
      try:
        value = int(value)
      except:
        value = value
      types.append(type(value).__name__)
    typeTuples.append(tuple(types))
    # print(tuple(types))

## SQL [1/2]

In [11]:
import sqlite3

db = sqlite3.connect('./data/databases/database.db')
cursor = db.cursor()

tuples = [
  tuple(episode.values())
  for episode in episodes
]

req = 'insert into Episode (name, episode, season, date, country, channel, url) values (?, ?, ?, ?, ?, ?, ?)'
cursor.executemany(req, tuples)
db.commit()
db.close()

## Algorithmie [1/2]

In [4]:
import sqlite3

db = sqlite3.connect('./data/databases/database.db')
cursor = db.cursor()

req = '''
  select channel, count(*) as length
  from Episode
  group by channel
  order by length desc
'''

res = cursor.execute(req).fetchall()
db.commit()
db.close()

print(res)

[('Netflix', 75), ('Disney+', 33), ('Apple TV+', 18), ('Paramount+', 15), ('Global', 13), ('TF1', 12), ('NBC', 12), ('HBO Max', 12), ('Hulu', 11), ('Prime Video', 10), ('Syfy', 9), ('Fox', 9), ('CraveTV', 7), ('ZDF', 6), ('Starz', 6), ('Channel 5', 6), ('CTV', 6), ('CBC', 6), ('AMC', 6), ('TV4', 5), ('SundanceTV', 4), ('Showtime', 4), ('HBO', 4), ('Epix', 4), ('BET+', 4), ('BBC One', 4), ('Amazon Freevee', 4), ('Adult Swim', 4), ('ALLBLK', 4), ('TMC', 3), ('Showtime on Demand', 3), ('Rai 1', 3), ('OWN', 3), ('Nickelodeon', 3), ('Moviestar+', 3), ('France 2', 3), ('FX', 3), ('CBC Gem', 3), ('BET', 3), ('TVE', 2), ('OCS', 2), ('Citytv', 2), ('ATRESplayer', 2), ('Één', 1), ('Sky1', 1), ('Puhu TV', 1), ('Paramount Network', 1), ('ITVX', 1), ('France 3', 1)]


## Scraping [2/2]

## SQL [2/2]

## Algorithmie [2/2]

## Orchestration