# JSON file Cleaning

In [106]:
import pandas as pd
import json

# Load broadcaster data
with open('broadcaster.json') as file:
    broadcaster_data = json.load(file)

# Load prices data
with open('prices.json') as file:
    prices_data = json.load(file)

df_broadcaster = pd.DataFrame(broadcaster_data)
df_prices = pd.DataFrame(prices_data)
df_broadcaster['sport'] = df_broadcaster['sport'].str.split(":",expand=True)[1]
df_broadcaster['sport'] = df_broadcaster['sport'].str.strip()


def clean_broadcasters(broadcasters):
    cleaned_broadcasters = []
    if isinstance(broadcasters, list):
        for broadcaster in broadcasters:
            broadcaster = broadcaster.replace('\n', '').strip()
            cleaned_broadcasters.append(broadcaster)
    return cleaned_broadcasters

df_broadcaster['broadcaster'] = df_broadcaster['broadcaster'].apply(clean_broadcasters)
df_prices = df_prices.rename(columns={"channel": "broadcaster"})
df_broadcaster = df_broadcaster.explode('Competition')
df_broadcaster

Unnamed: 0,sport,Competition,broadcaster
0,football,Ligue des Champions,"[Canal+, beIN SPORTS]"
1,football,Ligue des Champions féminine,"[DAZN, YouTube]"
2,football,Europa League,"[Canal+, RMC Sport]"
3,football,Ligue 1,"[Amazon, Canal+]"
4,football,Ligue 2,"[Amazon, beIN SPORTS]"
...,...,...,...
62,Natation,FFN Golden Tour,[beIN SPORTS]
63,Natation,International Swimming League,[beIN SPORTS]
64,automoto,Formule 1,[Canal+]
64,automoto,Moto GP,[Canal+]


# User input

In [37]:
from IPython.display import display
from ipywidgets import widgets

In [67]:
valid_sports = ['None'] + df_broadcaster['sport'].unique().tolist()

In [99]:
sport_widgets = [widgets.Dropdown(options=valid_sports, description=f'Sport {i+1}:') for i in range(5)]

for sport_widget in sport_widgets:
    display(sport_widget)

Dropdown(description='Sport 1:', options=('None', 'football', 'Basketball', 'Tennis', 'Rugby', 'Handball', 'Vo…

Dropdown(description='Sport 2:', options=('None', 'football', 'Basketball', 'Tennis', 'Rugby', 'Handball', 'Vo…

Dropdown(description='Sport 3:', options=('None', 'football', 'Basketball', 'Tennis', 'Rugby', 'Handball', 'Vo…

Dropdown(description='Sport 4:', options=('None', 'football', 'Basketball', 'Tennis', 'Rugby', 'Handball', 'Vo…

Dropdown(description='Sport 5:', options=('None', 'football', 'Basketball', 'Tennis', 'Rugby', 'Handball', 'Vo…

In [101]:
competition_widgets = []
for i, sport_widget in enumerate(sport_widgets):
    if sport_widget.value == 'None':
        competition_widgets.append(widgets.Dropdown(options=['None'], description=f'Competition {i+1}:'))
    else:
        competition_widgets.append(widgets.Dropdown(options=df_broadcaster[df_broadcaster['sport'] == sport_widget.value]['Competition'].unique().tolist(), description=f'Competition {i+1}:'))

for competition_widget in competition_widgets:
    display(competition_widget)

Dropdown(description='Competition 1:', options=('Ligue des Champions', 'Ligue des Champions féminine', 'Europa…

Dropdown(description='Competition 2:', options=('Ligue des Champions', 'Ligue des Champions féminine', 'Europa…

Dropdown(description='Competition 3:', options=('NBA', 'Betclic ELITE', 'PRO B', 'Match des Équipes de France …

Dropdown(description='Competition 4:', options=('None',), value='None')

Dropdown(description='Competition 5:', options=('None',), value='None')

In [102]:
user_input = [[sport_widget.value, competition_widget.value] for sport_widget, competition_widget in zip(sport_widgets, competition_widgets)]

In [103]:
user_input

[['football', 'Ligue des Champions'],
 ['football', 'Europa League'],
 ['Basketball', 'NBA'],
 ['None', 'None'],
 ['None', 'None']]

# Recommandation logic

In [110]:
df_broadcaster['broadcaster'] = df_broadcaster['broadcaster'].apply(lambda x: x if isinstance(x, list) else [x])
df_broadcaster = df_broadcaster.explode('broadcaster')

broadcaster_scores = {broadcaster: 0 for broadcaster in df_prices['broadcaster'].unique()}

relevant_broadcasters = set()

for i, (sport, competition) in enumerate(user_input):
    competition_broadcasters = df_broadcaster[(df_broadcaster['sport'] == sport) & (df_broadcaster['Competition'] == competition)]['broadcaster'].unique()

    for broadcaster in competition_broadcasters:
        broadcaster_scores[broadcaster] += (5 - i)
        relevant_broadcasters.add(broadcaster)
        
    for broadcaster in set(broadcaster_scores.keys()) - set(competition_broadcasters):
        broadcaster_scores[broadcaster] -= (5 - i)

# Subtract the proportional price from each broadcaster's score
max_price = df_prices['price'].max()
for broadcaster in broadcaster_scores:
    price = df_prices[df_prices['broadcaster'] == broadcaster]['price'].values[0]
    broadcaster_scores[broadcaster] -= price / max_price * 5

# Filter the broadcaster_scores dictionary to include only items with a positive score and that are in the set of relevant broadcasters
filtered_scores = {broadcaster: score for broadcaster, score in broadcaster_scores.items() if broadcaster in relevant_broadcasters}

# Sort the filtered dictionary by score in descending order
recommendations = sorted(filtered_scores.items(), key=lambda item: item[1], reverse=True)


for i, (broadcaster, score) in enumerate(recommendations):
    print(f"Recommendation {i+1}: {broadcaster} with a score of {score}")


Recommendation 1: beIN SPORTS with a score of -1.679528403001072
Recommendation 2: Canal+ with a score of -2.0
Recommendation 3: RMC Sport with a score of -10.394069310468025
