In [None]:
import requests
import json
from datetime import datetime, timedelta
import time
from tqdm import tqdm

import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize

from pymongo import MongoClient
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [None]:
# from Services import IGDB

class TooManyRequests(Exception):
    def __init__(self) -> None:
        super().__init__("Too Many Requests")
        

class IGDB:    
    def __init__(self, client_id, secret_key, access_token=None) -> None:
        if access_token is None:
            access_token = self.create_access_token(secret_key, client_id)
        self.client_id = client_id
        self.secret_key = secret_key
        self.access_token = access_token

    def post(self, url: str, headers: dict, queue: dict, data: str):
        url = url + f"?" + "&".join([f"{i}={v}" for i, v in queue.items()])
        response = requests.post(url, headers=headers, data=data)
        if response.status_code == 429:
            raise TooManyRequests()
        elif response.status_code != 200:
            raise Exception(f"Error {response.status_code}\n{response.content}")
        return json.loads(response.content)
        
    def create_access_token(self, secret_key, client_id):
        url = f"https://id.twitch.tv/oauth2/token"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0"
        }
        queue = {
            'client_secret': secret_key,
            'client_id': client_id,
            "grant_type": "client_credentials"
        }
        data = {}
        response = self.post(url, url, headers=headers, queue=queue, data=data)
        return response['access_token']
        
    def get_franchises(self, offset=0, limit=500):
        url = 'https://api.igdb.com/v4/franchises'
        headers = {
            'Client-ID': self.client_id, 
            'Authorization': f'Bearer {self.access_token}'
        }
        queue = {}
        data = f'''fields checksum,
            created_at,
            games.aggregated_rating, games.aggregated_rating_count, games.alternative_names.name, games.category, games.name,
            name,
            slug,
            updated_at,
            url;
            limit {limit};
            offset {offset};'''
        
        return self.post(url, headers=headers, queue=queue, data=data)
        
    def __str__(self):
        return f"client_id: {self.client_id}\n\tsecret_key: {self.secret_key}\n\taccess_token: {self.access_token}"
    def __repr__(self):
        return f"client_id: {self.client_id}\nsecret_key: {self.secret_key}\naccess_token: {self.access_token}"


In [None]:
client_id = 'k94eqeuau4fkat8yih6utw06h8b5lf'
secret_key = 'ruxijny32hunyfpa8m8vfoskm0ogk6'
access_token = "ot866i6dteo91c3teidmagbkgymk4r"

In [None]:
client = IGDB(client_id, secret_key, access_token)
client

In [None]:
f1 = client.get_franchises(0)
f2 = client.get_franchises(500)
f3 = client.get_franchises(1000)
f4 = client.get_franchises(1500)
franchises = f1 + f2 + f3 + f4

In [None]:
with open('/home/vadim/Projects/news_classifire/Data/igdb_franchise.json', 'w') as file:
    json.dump(franchises, file)

In [None]:
df = pd.DataFrame(franchises).dropna()
df.head()

In [None]:
df['games_count'] = df['games'].map(lambda x: len(x))
df['average_rating'] = df['games'].map(lambda x: sum([i.get('aggregated_rating_count') if i.get('aggregated_rating_count') is not None else 0 for i in x])/len(x))
df['votes_count'] = df['games'].map(lambda x: sum([i.get('aggregated_rating_count') if i.get('aggregated_rating_count') is not None else 0 for i in x]))

In [None]:
df.head()

In [None]:
df2 = df[(df['votes_count'] >= 1) & (df['games_count'] >= 1)]
df2['games_titles'] = df2['games'].map(lambda x: [i['name'] for i in x])

In [None]:
df3 = df2[['id', "name", "slug", "games_count", "average_rating", "votes_count", "games_titles"]]
df3.head(10)

In [None]:
def prepare_tokens(v):
    v = v.lower()
    tokens = word_tokenize(v)
    return {"value": v, "tokens": tokens, "l": len(tokens)}

In [None]:
df3['franchise_token'] = df3['name'].map(lambda x: prepare_tokens(x))
df3['games_titles_tokens'] = df3['games_titles'].map(lambda x: [prepare_tokens(i) for i in x])

In [None]:
with open("/home/vadim/Projects/news_classifire/Data/clean_franchise.json", 'w') as f:
    df3.to_json(f, orient='records')

In [None]:
x, y = np.unique(df2['games_count'], return_counts=True)

In [None]:
x

In [None]:
fig, ax = plt.subplots()

ax.stem(x, y)
plt.show()