In [12]:
!pip install transformers sentence-transformers
!pip install faiss-cpu
!pip install gradio
!pip install pandas

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence-transformers)
 



In [15]:
from google.colab import files

uploaded = files.upload()

Saving football_data.csv to football_data (1).csv


In [16]:
import pandas as pd

# Load the data
df = pd.read_csv('football_data.csv')
df.head()


Unnamed: 0,date,home_team,score,away_team
0,17-08-2024,Arsenal,2 - 0,Wolves
1,31-08-2024,Arsenal,1 - 1,Brighton
2,28-09-2024,Arsenal,4 - 2,Leicester
3,05-10-2024,Arsenal,3 - 1,Southampton
4,27-10-2024,Arsenal,2 - 2,Liverpool


In [17]:
# Data Preprocessing
df['score'] = df['score'].str.replace(' ', '')
df[['home_goals', 'away_goals']] = df['score'].str.split('-', expand=True)
df['home_goals'] = df['home_goals'].astype(int)
df['away_goals'] = df['away_goals'].astype(int)

def get_result(row):
    if row['home_goals'] > row['away_goals']:
        return 'Home Win'
    elif row['home_goals'] < row['away_goals']:
        return 'Away Win'
    else:
        return 'Draw'

df['result'] = df.apply(get_result, axis=1)

df['text'] = df.apply(
    lambda row: (f"On {row['date']}, {row['home_team']} played against {row['away_team']} with a score of "
                 f"{row['score']}, resulting in a {row['result']}."),
    axis=1
)

In [18]:
from sentence_transformers import SentenceTransformer

embedding_model = SentenceTransformer('all-MiniLM-L6-v2')


In [19]:
# Regenerate Embeddings
import faiss
import numpy as np
embeddings = embedding_model.encode(df['text'].tolist())
embedding_dim = embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)
index.add(np.array(embeddings))

In [20]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
model_name = 'google/flan-t5-small'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


In [21]:
def retrieve_context(question, k=10):
    # Encode the question using the embedding model
    question_embedding = embedding_model.encode([question])
    distances, indices = index.search(np.array(question_embedding), k)
    relevant_texts = [df['text'].iloc[idx] for idx in indices[0]]
    if 'draw' in question.lower():
        relevant_texts = [text for text in relevant_texts if 'draw' in text.lower()]
    return ' '.join(relevant_texts)

In [24]:
def generate_answer(question):
    context = retrieve_context(question)
    if not context.strip():
        return "I'm sorry, I couldn't find any information related to your question."

    prompt = f"""You are a helpful assistant knowledgeable about football matches.

Context:
{context}

Please answer the following question based on the context provided:
{question}

Answer:"""
    inputs = tokenizer.encode(prompt, return_tensors='pt', max_length=512, truncation=True)
    outputs = model.generate(
        inputs,
        max_length=150,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        early_stopping=True
    )
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer.strip()

In [25]:
import gradio as gr
iface = gr.Interface(
    fn=generate_answer,
    inputs=gr.Textbox(lines=2, placeholder='Ask me anything about past football matches...'),
    outputs=gr.Textbox(),
    title='Football Match Q&A System using flan-t5',
    description='Ask any question about past football matches from the dataset.',
    examples=[
        "on 17-08-2024 what was the result between Arsenal and Wolves?",
        "on 17-08-2024 which team had won between Arsenal and Wolves?",
    ]
)
iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://49b1ad579dcf89fc55.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [14]:
import gradio as gr
def show_team_matches(team_name):
    matches = df[(df['home_team'] == team_name) | (df['away_team'] == team_name)]
    return matches[['date', 'home_team', 'score', 'away_team']].reset_index(drop=True)

team_iface = gr.Interface(
    fn=show_team_matches,
    inputs=gr.Textbox(placeholder='Enter a team name...'),
    outputs='dataframe',
    title='Team Match Viewer',
    description='View all matches involving a specific team.'
)

team_iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://897589f93ae8664d08.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


