<a href="https://colab.research.google.com/github/Srini-c28/GEN-AI---lab-work/blob/main/2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, DBSCAN
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import io
import base64
from sklearn.metrics import silhouette_score, calinski_harabasz_score

def unsupervised_loan_approval(file_obj, n_clusters, dbscan_eps, dbscan_min_samples):
    """
    Performs unsupervised learning on loan approval data using KMeans and DBSCAN.
    """
    try:
        data = pd.read_csv(io.StringIO(file_obj.read().decode('utf-8')))
    except Exception as e:
        return f"Error reading file: {e}", None, None, None, None

    data = data.fillna(data.mean())
    numerical_features = data.select_dtypes(include=np.number)
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numerical_features)
    pca = PCA(n_components=2)
    pca_data = pca.fit_transform(scaled_data)

    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans_labels = kmeans.fit_predict(scaled_data)
    kmeans_silhouette = silhouette_score(scaled_data, kmeans_labels)
    kmeans_calinski = calinski_harabasz_score(scaled_data, kmeans_labels)

    dbscan = DBSCAN(eps=dbscan_eps, min_samples=dbscan_min_samples)
    dbscan_labels = dbscan.fit_predict(scaled_data)

    unique_labels = np.unique(dbscan_labels)
    if len(unique_labels) > 1 and -1 in unique_labels and len(unique_labels) > 2:
        dbscan_silhouette = silhouette_score(scaled_data, dbscan_labels)
        dbscan_calinski = calinski_harabasz_score(scaled_data, dbscan_labels)
    else:
        dbscan_silhouette = "N/A"
        dbscan_calinski = "N/A"

    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.scatter(pca_data[:, 0], pca_data[:, 1], c=kmeans_labels, cmap='viridis')
    plt.title(f'KMeans Clustering (Silhouette: {kmeans_silhouette:.2f}, Calinski: {kmeans_calinski:.2f})')

    plt.subplot(1, 2, 2)
    plt.scatter(pca_data[:, 0], pca_data[:, 1], c=dbscan_labels, cmap='viridis')
    plt.title(f'DBSCAN Clustering (Silhouette: {dbscan_silhouette}, Calinski: {dbscan_calinski})')

    plt.tight_layout()

    # Convert plot to base64 for Gradio image display.
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    image_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
    plt.close() #close the plot so it doesn't show up outside of gradio.

    return f"KMeans Silhouette Score: {kmeans_silhouette}\nKMeans Calinski-Harabasz Score: {kmeans_calinski}\nDBSCAN Silhouette Score: {dbscan_silhouette}\nDBSCAN Calinski-Harabasz Score: {dbscan_calinski}", f"data:image/png;base64,{image_base64}", kmeans_silhouette, kmeans_calinski, dbscan_silhouette, dbscan_calinski
    #return the metrics and the image.

if __name__ == "__main__":
    iface = gr.Interface(
        fn=unsupervised_loan_approval,
        inputs=[
            gr.File(type="file", label="Upload Loan Data (CSV)"),
            gr.Slider(minimum=2, maximum=10, step=1, value=3, label="Number of KMeans Clusters"),
            gr.Slider(minimum=0.1, maximum=2.0, step=0.1, value=0.5, label="DBSCAN Epsilon"),
            gr.Slider(minimum=2, maximum=20, step=1, value=5, label="DBSCAN Min Samples"),
        ],
        outputs=[
            gr.Textbox(label="Clustering Metrics"),
            gr.Image(label="Clustering Visualization"),
            gr.Number(label="KMeans Silhouette"),
            gr.Number(label="KMeans Calinski"),
            gr.Textbox(label="DBSCAN Silhouette"),
            gr.Textbox(label="DBSCAN Calinski")
        ],
        title="Unsupervised Loan Approval Clustering",
        description="Upload a CSV file with loan data and adjust clustering parameters.",
    )
    iface.launch()

Collecting gradio
  Downloading gradio-5.23.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

ValueError: Invalid value for parameter `type`: file. Please choose from one of: ['filepath', 'binary']

In [None]:
!pip install gradio
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import io
import base64
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score, calinski_harabasz_score

def movie_recommendations_kmeans(file_obj, n_clusters, target_movie):
    """
    Performs KMeans clustering on movie data and provides movie recommendations.

    Args:
        file_obj (file): Uploaded CSV file containing movie data.
        n_clusters (int): Number of clusters for KMeans.
        target_movie (str): Name of the movie for which to generate recommendations.

    Returns:
        tuple: A string containing recommendations, and a base64 encoded image of the plot, silhouette, and calinski scores.
    """
    try:
        data = pd.read_csv(io.StringIO(file_obj.read().decode('utf-8')))
    except Exception as e:
        return f"Error reading file: {e}", None, None, None

    # Assuming movie data has numerical features for clustering. Adjust as needed.
    numerical_features = data.select_dtypes(include=np.number).fillna(0)  # Handle NaNs.

    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numerical_features)

    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans_labels = kmeans.fit_predict(scaled_data)
    data['cluster'] = kmeans_labels

    try:
        target_movie_cluster = data[data['title'] == target_movie]['cluster'].values[0]  # Assuming a 'title' column.
    except IndexError:
        return f"Movie '{target_movie}' not found in the dataset.", None, None, None

    recommended_movies = data[data['cluster'] == target_movie_cluster]['title'].tolist()
    recommended_movies = [movie for movie in recommended_movies if movie != target_movie]  # Remove target movie.
    if not recommended_movies:
        recommendations_text = "No other movies found in the same cluster."
    else:
        recommendations_text = "Recommended Movies:\n" + "\n".join(recommended_movies)

    # Visualization (PCA for 2D plot)
    pca = PCA(n_components=2)
    pca_data = pca.fit_transform(scaled_data)

    plt.figure(figsize=(10, 6))
    plt.scatter(pca_data[:, 0], pca_data[:, 1], c=kmeans_labels, cmap='viridis')
    plt.title(f'Movie Clusters (Target: {target_movie})')
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.tight_layout()

    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    image_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
    plt.close()

    silhouette = silhouette_score(scaled_data, kmeans_labels)
    calinski = calinski_harabasz_score(scaled_data, kmeans_labels)

    return recommendations_text, f"data:image/png;base64,{image_base64}", silhouette, calinski


if __name__ == "__main__":
    iface = gr.Interface(
        fn=movie_recommendations_kmeans,
        inputs=[
            gr.File(type="file", label="Upload Movie Data (CSV)"),
            gr.Slider(minimum=2, maximum=20, step=1, value=5, label="Number of Clusters"),
            gr.Textbox(label="Target Movie Title"),
        ],
        outputs=[
            gr.Textbox(label="Movie Recommendations"),
            gr.Image(label="Movie Cluster Visualization"),
            gr.Number(label="Silhouette Score"),
            gr.Number(label="Calinski-Harabasz Score"),
        ],
        title="Movie Recommendation System (KMeans)",
        description="Upload a CSV file with movie data and get recommendations based on KMeans clustering.",
    )
    iface.launch()



ValueError: Invalid value for parameter `type`: file. Please choose from one of: ['filepath', 'binary']

In [None]:
iface = gr.Interface(
    fn=movie_recommendations_kmeans,
    inputs=[
        gr.File(type="filepath", label="Upload Movie Data (CSV)"), # Changed type to "filepath"
        gr.Slider(minimum=2, maximum=20, step=1, value=5, label="Number of Clusters"),
        gr.Textbox(label="Target Movie Title"),
    ],
    outputs=[
        gr.Textbox(label="Movie Recommendations"),
        gr.Image(label="Movie Cluster Visualization"),
        gr.Number(label="Silhouette Score"),
        gr.Number(label="Calinski-Harabasz Score"),
    ],
    title="Movie Recommendation System (KMeans)",
    description="Upload a CSV file with movie data and get recommendations based on KMeans clustering.",
)
iface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c787b57ebbcee20c48.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import io
import base64
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score, calinski_harabasz_score

def tamil_movie_recommendations_kmeans(file_obj, n_clusters, target_movie):
    """
    Performs KMeans clustering on Tamil movie data and provides movie recommendations.

    Args:
        file_obj (file): Uploaded CSV file containing Tamil movie data.
        n_clusters (int): Number of clusters for KMeans.
        target_movie (str): Name of the Tamil movie for which to generate recommendations.

    Returns:
        tuple: A string containing recommendations, and a base64 encoded image of the plot, silhouette, and calinski scores.
    """
    try:
        data = pd.read_csv(io.StringIO(file_obj.read().decode('utf-8')))
    except Exception as e:
        return f"Error reading file: {e}", None, None, None

    # Assuming movie data has numerical features for clustering. Adjust as needed.
    numerical_features = data.select_dtypes(include=np.number).fillna(0)  # Handle NaNs.

    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numerical_features)

    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans_labels = kmeans.fit_predict(scaled_data)
    data['cluster'] = kmeans_labels

    try:
        target_movie_cluster = data[data['title'] == target_movie]['cluster'].values[0]  # Assuming a 'title' column.
    except IndexError:
        return f"Movie '{target_movie}' not found in the dataset.", None, None, None

    recommended_movies = data[data['cluster'] == target_movie_cluster]['title'].tolist()
    recommended_movies = [movie for movie in recommended_movies if movie != target_movie]  # Remove target movie.
    if not recommended_movies:
        recommendations_text = "No other movies found in the same cluster."
    else:
        recommendations_text = "Recommended Movies:\n" + "\n".join(recommended_movies)

    # Visualization (PCA for 2D plot)
    pca = PCA(n_components=2)
    pca_data = pca.fit_transform(scaled_data)

    plt.figure(figsize=(10, 6))
    plt.scatter(pca_data[:, 0], pca_data[:, 1], c=kmeans_labels, cmap='viridis')
    plt.title(f'Tamil Movie Clusters (Target: {target_movie})')
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.tight_layout()

    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    image_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
    plt.close()

    silhouette = silhouette_score(scaled_data, kmeans_labels)
    calinski = calinski_harabasz_score(scaled_data, kmeans_labels)

    return recommendations_text, f"data:image/png;base64,{image_base64}", silhouette, calinski


if __name__ == "__main__":
    iface = gr.Interface(
        fn=tamil_movie_recommendations_kmeans,
        inputs=[
            gr.File(type="file", label="Upload Tamil Movie Data (CSV)"),
            gr.Slider(minimum=2, maximum=20, step=1, value=5, label="Number of Clusters"),
            gr.Textbox(label="Target Tamil Movie Title"),
        ],
        outputs=[
            gr.Textbox(label="Movie Recommendations"),
            gr.Image(label="Movie Cluster Visualization"),
            gr.Number(label="Silhouette Score"),
            gr.Number(label="Calinski-Harabasz Score"),
        ],
        title="Tamil Movie Recommendation System (KMeans)",
        description="Upload a CSV file with Tamil movie data and get recommendations based on KMeans clustering.",
    )
    iface.launch()


ValueError: Invalid value for parameter `type`: file. Please choose from one of: ['filepath', 'binary']

In [None]:
iface = gr.Interface(
    fn=tamil_movie_recommendations_kmeans,
    inputs=[
        gr.File(type="filepath", label="Upload Tamil Movie Data (CSV)"), # Changed type to "filepath"
        gr.Slider(minimum=2, maximum=20, step=1, value=

SyntaxError: incomplete input (<ipython-input-5-05f1861baeaa>, line 5)

In [None]:
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import io
import base64
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score, calinski_harabasz_score

def tamil_movie_recommendations_kmeans(file_obj, n_clusters, target_movie):
    """
    Performs KMeans clustering on Tamil movie data and provides movie recommendations.

    Args:
        file_obj (file): Uploaded CSV file containing Tamil movie data.
        n_clusters (int): Number of clusters for KMeans.
        target_movie (str): Name of the Tamil movie for which to generate recommendations.

    Returns:
        tuple: A string containing recommendations, and a base64 encoded image of the plot, silhouette, and calinski scores.
    """
    try:
        data = pd.read_csv(io.StringIO(file_obj.read().decode('utf-8')))
    except Exception as e:
        return f"Error reading file: {e}", None, None, None

    # Assuming movie data has numerical features for clustering. Adjust as needed.
    numerical_features = data.select_dtypes(include=np.number).fillna(0)  # Handle NaNs.

    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numerical_features)

    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans_labels = kmeans.fit_predict(scaled_data)
    data['cluster'] = kmeans_labels

    try:
        target_movie_cluster = data[data['title'] == target_movie]['cluster'].values[0]  # Assuming a 'title' column.
    except IndexError:
        return f"Movie '{target_movie}' not found in the dataset.", None, None, None

    recommended_movies = data[data['cluster'] == target_movie_cluster]['title'].tolist()
    recommended_movies = [movie for movie in recommended_movies if movie != target_movie]  # Remove target movie.
    if not recommended_movies:
        recommendations_text = "No other movies found in the same cluster."
    else:
        recommendations_text = "Recommended Movies:\n" + "\n".join(recommended_movies)

    # Visualization (PCA for 2D plot)
    pca = PCA(n_components=2)
    pca_data = pca.fit_transform(scaled_data)

    plt.figure(figsize=(10, 6))
    plt.scatter(pca_data[:, 0], pca_data[:, 1], c=kmeans_labels, cmap='viridis')
    plt.title(f'Tamil Movie Clusters (Target: {target_movie})')
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.tight_layout()

    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    image_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
    plt.close()

    silhouette = silhouette_score(scaled_data, kmeans_labels)
    calinski = calinski_harabasz_score(scaled_data, kmeans_labels)

    return recommendations_text, f"data:image/png;base64,{image_base64}", silhouette, calinski


if __name__ == "__main__":
    iface = gr.Interface(
        fn=tamil_movie_recommendations_kmeans,
        inputs=[
            gr.File(type="file", label="Upload Tamil Movie Data (CSV)"),
            gr.Slider(minimum=2, maximum=20, step=1, value=5, label="Number of Clusters"),
            gr.Textbox(label="Target Tamil Movie Title"),
        ],
        outputs=[
            gr.Textbox(label="Movie Recommendations"),
            gr.Image(label="Movie Cluster Visualization"),
            gr.Number(label="Silhouette Score"),
            gr.Number(label="Calinski-Harabasz Score"),
        ],
        title="Tamil Movie Recommendation System (KMeans)",
        description="Upload a CSV file with Tamil movie data and get recommendations based on KMeans clustering.",
    )
    iface.launch()

ValueError: Invalid value for parameter `type`: file. Please choose from one of: ['filepath', 'binary']

In [None]:
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import io
import base64
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score, calinski_harabasz_score

def tamil_movie_recommendations_kmeans(data_file, n_clusters, target_movie):
    """
    Performs KMeans clustering on Tamil movie data and provides movie recommendations.

    Args:
        data_file (str): Path to the uploaded CSV file containing Tamil movie data.
        n_clusters (int): Number of clusters for KMeans.
        target_movie (str): Name of the Tamil movie for which to generate recommendations.

    Returns:
        tuple: A string containing recommendations, and a base64 encoded image of the plot, silhouette, and calinski scores.
    """
    try:
        data = pd.read_csv(data_file.name) # important change. access name property of file object.
    except Exception as e:
        return f"Error reading file: {e}", None, None, None

    # Assuming movie data has numerical features for clustering. Adjust as needed.
    numerical_features = data.select_dtypes(include=np.number).fillna(0)  # Handle NaNs.

    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numerical_features)

    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans_labels = kmeans.fit_predict(scaled_data)
    data['cluster'] = kmeans_labels

    try:
        target_movie_cluster = data[data['title'] == target_movie]['cluster'].values[0]  # Assuming a 'title' column.
    except IndexError:
        return f"Movie '{target_movie}' not found in the dataset.", None, None, None

    recommended_movies = data[data['cluster'] == target_movie_cluster]['title'].tolist()
    recommended_movies = [movie for movie in recommended_movies if movie != target_movie]  # Remove target movie.
    if not recommended_movies:
        recommendations_text = "No other movies found in the same cluster."
    else:
        recommendations_text = "Recommended Movies:\n" + "\n".join(recommended_movies)

    # Visualization (PCA for 2D plot)
    pca = PCA(n_components=2)
    pca_data = pca.fit_transform(scaled_data)

    plt.figure(figsize=(10, 6))
    plt.scatter(pca_data[:, 0], pca_data[:, 1], c=kmeans_labels, cmap='viridis')
    plt.title(f'Tamil Movie Clusters (Target: {target_movie})')
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.tight_layout()

    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    image_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
    plt.close()

    silhouette = silhouette_score(scaled_data, kmeans_labels)
    calinski = calinski_harabasz_score(scaled_data, kmeans_labels)

    return recommendations_text, f"data:image/png;base64,{image_base64}", silhouette, calinski


if __name__ == "__main__":
    iface = gr.Interface(
        fn=tamil_movie_recommendations_kmeans,
        inputs=[
            gr.File(type="file", label="Upload Tamil Movie Data (CSV)"), #important change.
            gr.Slider(minimum=2, maximum=20, step=1, value=5, label="Number of Clusters"),
            gr.Textbox(label="Target Tamil Movie Title"),
        ],
        outputs=[
            gr.Textbox(label="Movie Recommendations"),
            gr.Image(label="Movie Cluster Visualization"),
            gr.Number(label="Silhouette Score"),
            gr.Number(label="Calinski-Harabasz Score"),
        ],
        title="Tamil Movie Recommendation System (KMeans)",
        description="Upload a CSV file with Tamil movie data and get recommendations based on KMeans clustering.",
    )
    iface.launch()


ValueError: Invalid value for parameter `type`: file. Please choose from one of: ['filepath', 'binary']

In [None]:
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import io
import base64
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score, calinski_harabasz_score

def tamil_movie_recommendations_kmeans(data_file, n_clusters, target_movie):
    """
    Performs KMeans clustering on Tamil movie data and provides movie recommendations.

    Args:
        data_file (str): Path to the uploaded CSV file containing Tamil movie data.
        n_clusters (int): Number of clusters for KMeans.
        target_movie (str): Name of the Tamil movie for which to generate recommendations.

    Returns:
        tuple: A string containing recommendations, and a base64 encoded image of the plot, silhouette, and calinski scores.
    """
    try:
        data = pd.read_csv(data_file.name)
    except Exception as e:
        return f"Error reading file: {e}", None, None, None

    # Assuming movie data has numerical features for clustering. Adjust as needed.
    numerical_features = data.select_dtypes(include=np.number).fillna(0)  # Handle NaNs.

    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numerical_features)

    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans_labels = kmeans.fit_predict(scaled_data)
    data['cluster'] = kmeans_labels

    try:
        target_movie_cluster = data[data['title'] == target_movie]['cluster'].values[0]  # Assuming a 'title' column.
    except IndexError:
        return f"Movie '{target_movie}' not found in the dataset.", None, None, None

    recommended_movies = data[data['cluster'] == target_movie_cluster]['title'].tolist()
    recommended_movies = [movie for movie in recommended_movies if movie != target_movie]  # Remove target movie.
    if not recommended_movies:
        recommendations_text = "No other movies found in the same cluster."
    else:
        recommendations_text = "Recommended Movies:\n" + "\n".join(recommended_movies)

    # Visualization (PCA for 2D plot)
    pca = PCA(n_components=2)
    pca_data = pca.fit_transform(scaled_data)

    plt.figure(figsize=(10, 6))
    plt.scatter(pca_data[:, 0], pca_data[:, 1], c=kmeans_labels, cmap='viridis')
    plt.title(f'Tamil Movie Clusters (Target: {target_movie})')
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.tight_layout()

    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    image_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
    plt.close()

    silhouette = silhouette_score(scaled_data, kmeans_labels)
    calinski = calinski_harabasz_score(scaled_data, kmeans_labels)

    return recommendations_text, f"data:image/png;base64,{image_base64}", silhouette, calinski


if __name__ == "__main__":
    iface = gr.Interface(
        fn=tamil_movie_recommendations_kmeans,
        inputs=[
            gr.File(type="filepath", label="Upload Tamil Movie Data (CSV)"), #Corrected line.
            gr.Slider(minimum=2, maximum=20, step=1, value=5, label="Number of Clusters"),
            gr.Textbox(label="Target Tamil Movie Title"),
        ],
        outputs=[
            gr.Textbox(label="Movie Recommendations"),
            gr.Image(label="Movie Cluster Visualization"),
            gr.Number(label="Silhouette Score"),
            gr.Number(label="Calinski-Harabasz Score"),
        ],
        title="Tamil Movie Recommendation System (KMeans)",
        description="Upload a CSV file with Tamil movie data and get recommendations based on KMeans clustering.",
    )
    iface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://527ee1b5b45674f4a4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
