In [1]:
import pandas as pd

In [2]:
import pandas as pd

chunk_size = 1000  # Adjust based on your system's memory
file_path = "/content/spotify_millsongdata.csv"

chunks = []  # List to hold chunks

try:
    for chunk in pd.read_csv(file_path, chunksize=chunk_size, engine='python'):
        chunks.append(chunk)  # Append each chunk to the list
except Exception as e:
    print(f"Error: {e}")

# Concatenate all chunks to form the complete DataFrame
df = pd.concat(chunks, ignore_index=True)

# Find the overall shape of the DataFrame
print(f"Overall shape of the DataFrame: {df.shape}")


Error: unexpected end of data
Overall shape of the DataFrame: (2000, 4)


In [3]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df.shape

(2000, 4)

In [5]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [6]:
df.drop(columns='link',inplace=True)

#df=df.drop('link',axis=1).reset_index(drop=True)

In [7]:
df['text'][0]

"Look at her face, it's a wonderful face  \r\nAnd it means something special to me  \r\nLook at the way that she smiles when she sees me  \r\nHow lucky can one fellow be?  \r\n  \r\nShe's just my kind of girl, she makes me feel fine  \r\nWho could ever believe that she could be mine?  \r\nShe's just my kind of girl, without her I'm blue  \r\nAnd if she ever leaves me what could I do, what could I do?  \r\n  \r\nAnd when we go for a walk in the park  \r\nAnd she holds me and squeezes my hand  \r\nWe'll go on walking for hours and talking  \r\nAbout all the things that we plan  \r\n  \r\nShe's just my kind of girl, she makes me feel fine  \r\nWho could ever believe that she could be mine?  \r\nShe's just my kind of girl, without her I'm blue  \r\nAnd if she ever leaves me what could I do, what could I do?\r\n\r\n"

## Text Preprocessing

In [8]:
df['text']=df['text'].str.lower().replace(r'^\w\s',' ').replace(r'\n',' ',regex=True)  # \w\s ----->(a-zA-Z0-9)

In [9]:
df['text'][0]

"look at her face, it's a wonderful face  \r and it means something special to me  \r look at the way that she smiles when she sees me  \r how lucky can one fellow be?  \r   \r she's just my kind of girl, she makes me feel fine  \r who could ever believe that she could be mine?  \r she's just my kind of girl, without her i'm blue  \r and if she ever leaves me what could i do, what could i do?  \r   \r and when we go for a walk in the park  \r and she holds me and squeezes my hand  \r we'll go on walking for hours and talking  \r about all the things that we plan  \r   \r she's just my kind of girl, she makes me feel fine  \r who could ever believe that she could be mine?  \r she's just my kind of girl, without her i'm blue  \r and if she ever leaves me what could i do, what could i do?\r \r "

In [10]:
import nltk
from nltk.stem.porter import PorterStemmer

In [11]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [12]:
stemmer=PorterStemmer()

In [13]:
def token(txt):
  token=nltk.word_tokenize(txt)
  a=[stemmer.stem(w) for w in token]
  return " ".join(a)

In [14]:
token('Beautiful,You are beautiful')


'beauti , you are beauti'

In [15]:
df['text'].apply(lambda x:token(x))

0       look at her face , it 's a wonder face and it ...
1       take it easi with me , pleas touch me gentli l...
2       i 'll never know whi i had to go whi i had to ...
3       make somebodi happi is a question of give and ...
4       make somebodi happi is a question of give and ...
                              ...                        
1995    are we readi are we readi readi are we readi a...
1996    do i imagin it , or do i see your stare is the...
1997    i 'm miss american dream sinc i wa seventeen d...
1998    vers 1 : hey princ charm you can kiss my ass i...
1999    everybodi readi , caus we gon na get up in thi...
Name: text, Length: 2000, dtype: object

In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [17]:
tfidf=TfidfVectorizer(analyzer='word',stop_words='english')

In [18]:
matrix=tfidf.fit_transform(df['text'])

In [20]:
similar=cosine_similarity(matrix)

In [21]:
similar[0]

array([1.        , 0.00186418, 0.00927137, ..., 0.00672121, 0.00861685,
       0.        ])

In [23]:
df[df['song']=="Ahe's My Kind Of Girl"]

Unnamed: 0,artist,song,text
0,ABBA,Ahe's My Kind Of Girl,"look at her face, it's a wonderful face \r an..."


In [26]:
df[df['song']=="Ahe's My Kind Of Girl"].index[0]  # to see the index number

0

# Recommendation Function

In [29]:
def recommender(song_name):
  idx=df[df['song']==song_name].index[0]
  distance=sorted(list(enumerate(similar[idx])),reverse=True,key=lambda x:x[1])
  song=[]
  for s_id in distance[1:7]:
    song.append(df.iloc[s_id[0]].song)
  return song

In [30]:
recommender("Ahe's My Kind Of Girl")

['I Am Just A Girl',
 "That's Me",
 'If I Could Just See You Now',
 'Another Girl',
 "Nobody's Girl",
 'Be Kind To Me']

In [31]:
import pickle

In [32]:
pickle.dump(similar,open("similarity","wb"))

In [33]:
pickle.dump(df,open("similarity","wb"))