In [1]:
from SportsScrapper import BCCI_Scrapper, ICC_Scrapper, Indian_Athletes_Scrapper

def search_official(query, player_type, player_platform, type):
    if type == 'bcci':
        scrapper = BCCI_Scrapper()
        return scrapper.get_player_data(query, player_platform, player_type)
    elif type == 'icc':
        scrapper = ICC_Scrapper()
        return scrapper.get_player_data(query)
    elif type == 'indian_athletes':
        scrapper = Indian_Athletes_Scrapper()
        return scrapper.get_player_data(query)
    else:
        data = []
        scrapper = BCCI_Scrapper()
        data.append(scrapper.get_player_data(query, player_platform, player_type))
        scrapper = ICC_Scrapper()
        data.append(scrapper.get_player_data(query))
        scrapper = Indian_Athletes_Scrapper()
        data.append(scrapper.get_player_data(query))
        return data


In [2]:
print(search_official('virat kohli', 'men', 'international', 'bcci'))

{'Response': [{'title': 'Terrific Running Catch ft. Virat Kohli', 'date': '17th Jan, 2024', 'views': 20300, 'platform': 'international', 'type': 'men', 'player_name': 'virat kohli', 'image_url': 'https://bcciplayerimages.s3.ap-south-1.amazonaws.com/resizedimageskirti/3588749423001/379f20f7-9221-4dd7-892e-84fdbe579024/2d8f8913-98dc-473b-a5ec-767bfdf1ddc7/1280x720/match/image_compress.jpeg', 'link': 'https://www.bcci.tv/bccilink/videos/3sb2iQND', 'sport': 'Cricket'}, {'title': 'IND vs AFG 2024, 3RD T20I: Virat Kohli Wicket', 'date': '17th Jan, 2024', 'views': 5500, 'platform': 'international', 'type': 'men', 'player_name': 'virat kohli', 'image_url': 'https://bcciplayerimages.s3.ap-south-1.amazonaws.com/resizedimageskirti/3588749423001/24ee868a-1376-4441-a432-30bd036586a4/4c00e264-d9ba-4762-a725-9f1c9216c27f/1280x720/match/image_compress.jpeg', 'link': 'https://www.bcci.tv/bccilink/videos/jmBT81fx', 'sport': 'Cricket'}, {'title': '4 x 4: Placed to perfection ft. Virat Kohli & Yashasvi Ja

In [3]:
import requests

def search_unofficial(query):
    query = query.lower()
    query = query.replace(' ', '-')
    url = 'https://newsapi.org/v2/everything?'
    parameters = {
        'q' : query,
        'apiKey': '399a3fe0b00b4bbfa2188e79abdc5b8b',
        'sources': 'the-times-of-india,the-hindu,hindustan-times,the-indian-express,news18,ndtv,india-today,zee-news,abp-news,india-tv,republic-world,the-quint,the-wire,scroll,the-print',
    }
    response = requests.get(url, params=parameters)
    data = response.json()
    return data['articles']

In [4]:
print(search_unofficial('virat kohli'))

[{'source': {'id': 'the-times-of-india', 'name': 'The Times of India'}, 'author': 'PTI', 'title': 'Impact Player rule has disrupted balance of game: Virat Kohli', 'description': "India's Virat Kohli has criticised the Impact Player substitution rule, which he believes disrupts the balance of the game. The mid-innings substitution rule, adopted in the previous edition of the IPL, has sparked a row with India skipper Rohit Sharma. Kohli…", 'url': 'https://economictimes.indiatimes.com/news/sports/impact-player-rule-has-disrupted-balance-of-game-virat-kohli/articleshow/110228547.cms', 'urlToImage': 'https://img.etimg.com/thumb/msid-110228702,width-1200,height-630,imgsize-38456,overlay-economictimes/photo.jpg', 'publishedAt': '2024-05-18T08:26:11Z', 'content': 'Echoing India skipper Rohit Sharma\'s sentiments, star batter Virat Kohli has criticised the Impact Player substitution rule and said it is "disrupting the balance" of the game. The mid-innings substi… [+2201 chars]'}, {'source': {'i

In [5]:
def search(query, player_type, player_platform, type):
    official_data = search_official(query, player_type, player_platform, type)
    unofficial_data = search_unofficial(query)

    truth_values = assess_truth(unofficial_data, official_data)
    influences = [detect_influence(article) for article in unofficial_data]
    clustered_articles = cluster_articles(unofficial_data)
    sentiments = [sentiment_analysis(article) for article in unofficial_data]
    relevance_scores = [relevance_score(article, query) for article in unofficial_data]

    for i, article in enumerate(unofficial_data):
        article['truth_value'] = truth_values[i]
        article['political_influence'], article['emotional_influence'] = influences[i]
        article['sentiment_polarity'] = sentiments[i]['polarity']
        article['sentiment_polarity_label'] = sentiments[i]['polarity_label']
        article['sentiment_subjectivity'] = sentiments[i]['subjectivity']
        article['sentiment_subjectivity_label'] = sentiments[i]['subjectivity_label']
        article['relevance_score_tfidf'] = relevance_scores[i]['tfidf_similarity']
        article['relevance_score_bert'] = relevance_scores[i]['bert_similarity']

    result = {
        'official_data': official_data,
        'unofficial_data': unofficial_data,
        'clusters': clustered_articles,
    }
    return result

# Example usage
# result = search("Virat Kohli", "batsman", "ODI", "bcci")
# print(result)


In [6]:
print(search('Virat Kohli', 'men', 'international', 'bcci'))

{'official_data': {'Response': [{'title': 'Terrific Running Catch ft. Virat Kohli', 'date': '17th Jan, 2024', 'views': 20300, 'platform': 'international', 'type': 'men', 'player_name': 'Virat Kohli', 'image_url': 'https://bcciplayerimages.s3.ap-south-1.amazonaws.com/resizedimageskirti/3588749423001/379f20f7-9221-4dd7-892e-84fdbe579024/2d8f8913-98dc-473b-a5ec-767bfdf1ddc7/1280x720/match/image_compress.jpeg', 'link': 'https://www.bcci.tv/bccilink/videos/3sb2iQND', 'sport': 'Cricket'}, {'title': 'IND vs AFG 2024, 3RD T20I: Virat Kohli Wicket', 'date': '17th Jan, 2024', 'views': 5500, 'platform': 'international', 'type': 'men', 'player_name': 'Virat Kohli', 'image_url': 'https://bcciplayerimages.s3.ap-south-1.amazonaws.com/resizedimageskirti/3588749423001/24ee868a-1376-4441-a432-30bd036586a4/4c00e264-d9ba-4762-a725-9f1c9216c27f/1280x720/match/image_compress.jpeg', 'link': 'https://www.bcci.tv/bccilink/videos/jmBT81fx', 'sport': 'Cricket'}, {'title': '4 x 4: Placed to perfection ft. Virat K

In [None]:
from SportsScrapper import BCCI_Scrapper, ICC_Scrapper, Indian_Athletes_Scrapper
import requests
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
from transformers import BertTokenizer, BertModel
from sklearn.cluster import KMeans
from textblob import TextBlob
import numpy as np

sentiment_pipeline = pipeline('sentiment-analysis')
political_influence_model = pipeline(
    'text-classification', model='typeform/distilbert-base-uncased-mnli')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')


def bert_embedding(text):
    inputs = tokenizer(text, return_tensors='pt',
                       max_length=512, truncation=True, padding=True)
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).detach().numpy()
    return embeddings

def search_official(query, player_type, player_platform, type):
    if type == 'bcci':
        scrapper = BCCI_Scrapper()
        return scrapper.get_player_data(query, player_platform, player_type)
    elif type == 'icc':
        scrapper = ICC_Scrapper()
        return scrapper.get_player_data(query)
    elif type == 'indian_athletes':
        scrapper = Indian_Athletes_Scrapper()
        return scrapper.get_player_data(query)
    else:
        data = []
        scrapper = BCCI_Scrapper()
        data.append(scrapper.get_player_data(
            query, player_platform, player_type))
        scrapper = ICC_Scrapper()
        data.append(scrapper.get_player_data(query))
        scrapper = Indian_Athletes_Scrapper()
        data.append(scrapper.get_player_data(query))
        return data


def search_unofficial(query):
    query = query.lower().replace(' ', '-')
    url = 'https://newsapi.org/v2/everything?'
    parameters = {
        'q': query,
        'apiKey': 'YOUR_NEWS_API_KEY',
        'sources': 'the-times-of-india,the-hindu,hindustan-times,the-indian-express,news18,ndtv,india-today,zee-news,abp-news,india-tv,republic-world,the-quint,the-wire,scroll,the-print',
    }
    response = requests.get(url, params=parameters)
    data = response.json()
    return data['articles']


def assess_truth(unofficial_data, official_data):
    truth_values = []
    vectorizer = TfidfVectorizer()
    for article in unofficial_data:
        unofficial_text = f"{article['title']} {article['description']} {article['content']}"
        similarity_scores = []

        for official_article in official_data:
            official_text = f"{official_article['title']} {official_article['player_name']}"
            vectors = vectorizer.fit_transform(
                [official_text, unofficial_text])
            similarity = cosine_similarity(vectors[0:1], vectors[1:2])[0][0]
            similarity_scores.append(similarity)
        truth_value = max(similarity_scores)
        truth_values.append(truth_value)
    return truth_values


def detect_influence(article):
    content = article['content']
    sentiment_result = sentiment_pipeline(content)
    emotional_influence = sentiment_result[0]['label'] in [
        'NEGATIVE', 'POSITIVE']
    political_result = political_influence_model(content)
    political_influence = any(
        label['label'] == 'POLITICS' and label['score'] > 0.5 for label in political_result)
    return political_influence, emotional_influence


def cluster_articles(articles):
    contents = [article['content'] for article in articles]
    vectorizer = TfidfVectorizer(stop_words='english')
    X = vectorizer.fit_transform(contents)

    true_k = 5  # Number of clusters
    model = KMeans(n_clusters=true_k, random_state=42)
    model.fit(X)

    labels = model.labels_
    cluster_dict = {i: [] for i in range(true_k)}
    for idx, label in enumerate(labels):
        cluster_dict[label].append(articles[idx])

    return cluster_dict


def sentiment_analysis(article):
    analysis = TextBlob(article['content'])
    polarity = analysis.sentiment.polarity
    subjectivity = analysis.sentiment.subjectivity
    if polarity > 0:
        sentiment = 'Positive'
    elif polarity < 0:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
    if subjectivity >= 0.5:
        objectivity = 'Subjective'
    else:
        objectivity = 'Objective'

    return {
        'polarity': polarity,
        'polarity_label': sentiment,
        'subjectivity': subjectivity,
        'subjectivity_label': objectivity
    }


def relevance_score(article, query):
    title = article['title']
    description = article['description']
    content = article['content']
    combined_text = f"{title} {description} {content}"
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([query, combined_text])
    tfidf_similarity = cosine_similarity(vectors[0:1], vectors[1:2])[0][0]
    query_embedding = bert_embedding(query)
    text_embedding = bert_embedding(combined_text)
    bert_similarity = cosine_similarity(query_embedding, text_embedding)[0][0]
    return {
        'tfidf_similarity': tfidf_similarity,
        'bert_similarity': bert_similarity
    }


def search(query, player_type, player_platform, type):
    official_data = search_official(query, player_type, player_platform, type)
    unofficial_data = search_unofficial(query)

    truth_values = assess_truth(unofficial_data, official_data)
    influences = [detect_influence(article) for article in unofficial_data]
    clustered_articles = cluster_articles(unofficial_data)
    sentiments = [sentiment_analysis(article) for article in unofficial_data]
    relevance_scores = [relevance_score(article, query)
                        for article in unofficial_data]

    for i, article in enumerate(unofficial_data):
        article['truth_value'] = truth_values[i]
        article['political_influence'], article['emotional_influence'] = influences[i]
        article['sentiment_polarity'] = sentiments[i]['polarity']
        article['sentiment_polarity_label'] = sentiments[i]['polarity_label']
        article['sentiment_subjectivity'] = sentiments[i]['subjectivity']
        article['sentiment_subjectivity_label'] = sentiments[i]['subjectivity_label']
        article['relevance_score_tfidf'] = relevance_scores[i]['tfidf_similarity']
        article['relevance_score_bert'] = relevance_scores[i]['bert_similarity']

    result = {
        'official_data': official_data,
        'unofficial_data': unofficial_data,
        'clusters': clustered_articles,
    }
    return result