In [3]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score, precision_score
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import re

# Load the dataset
data = pd.read_csv('my dataset 6.csv')
# Preprocess the data
data['genre'] = data['genre'].apply(lambda x: re.sub(r'[^\w\s]', '', x))  # Remove punctuation
data['genre'] = data['genre'].str.lower()  # Convert genre to lowercase
# Remove duplicate elements
data['genre'] = data['genre'].apply(lambda x: ' '.join(set(x.split())))


# Extract the genre and title of each song
genres = data['genre'].values
titles = data['song name'].values

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(titles, genres, test_size=0.2, random_state=42)


# Apply TF-IDF vectorization on the title column
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(titles)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)


# Train the SVM model
svm_model = SVC(kernel='linear', C=1.0)

svm_model.fit(X_train_tfidf, y_train)

# Define a function to recommend similar songs based on the genre of the input song
def recommend_songs(input_song, n=5):
    # Extract the genre of the input song
    input_song_data = data[data['song name'] == input_song]
    input_genre = input_song_data['genre'].values[0]

    # Calculate the cosine similarity between the input song and all the other songs in the dataset
    input_index = titles.tolist().index(input_song)
    similarity_scores = cosine_similarity(tfidf_matrix[input_index], tfidf_matrix)
    #print(similarity_scores)

    # Sort the songs by similarity score and genre
    sorted_indices = []
    for i in range(len(similarity_scores[0])):
        if genres[i] == input_genre:
            sorted_indices.append(i)
    sorted_indices = sorted(sorted_indices, key=lambda i: similarity_scores[0][i], reverse=True)

    # Get the top-n songs that are most similar to the input song
    recommended_songs = []
    for i in sorted_indices:
        if titles[i] != input_song:
            recommended_songs.append(titles[i])
        if len(recommended_songs) == n:
            break

    return recommended_songs

# Example usage
input_song = input("Enter a song:-")
recommended_songs = recommend_songs(input_song, n=5)
print(f"Songs similar to '{input_song}': {recommended_songs}")

# Predict the genres of the test set
y_pred = svm_model.predict(X_test_tfidf)

# Calculate the accuracy and precision of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")



Enter a song:-Jao Jao Ure
Songs similar to 'Jao Jao Ure': ['Jodi Chuey Jao', 'Hariye Jao Jodi Bhire', 'Ure Jak', 'Hawai jahaje ure', 'Sabar Hridaye Rabindranath']
Accuracy: 0.43
Precision: 0.39


  _warn_prf(average, modifier, msg_start, len(result))
