In [1]:
!git clone https://github.com/AshishJangra27/datasets

Cloning into 'datasets'...
remote: Enumerating objects: 328, done.[K
remote: Counting objects: 100% (93/93), done.[K
remote: Compressing objects: 100% (83/83), done.[K
remote: Total 328 (delta 19), reused 54 (delta 9), pack-reused 235 (from 1)[K
Receiving objects: 100% (328/328), 278.62 MiB | 13.03 MiB/s, done.
Resolving deltas: 100% (145/145), done.
Updating files: 100% (225/225), done.


### 1. Overall Data Exploration

#### 1.1) Importing Libraries

In [2]:
import numpy as np
import pandas as pd

#### 1.2) Loading the Dataset

In [3]:
df = pd.read_csv('datasets/Gaana/songs.csv')

df.head()

Unnamed: 0,name,singer,singer_id,duration,link,language
0,Dil - E - Nadan Tujhe,Chitra Singh|Jagjit Singh,/artist/chitra-singh|/artist/jagjeet-singh-1,05:00,/dil-e-nadan-tujhe-3,Urdu
1,Agar Hum Kahen Aur Woh Muskara De,Chitra Singh|Jagjit Singh,/artist/chitra-singh|/artist/jagjeet-singh-1,06:26,/agar-hum-kahen-aur-woh-muskura-den,Urdu
2,Unke Dekhe Se,Jagjit Singh,/artist/jagjeet-singh-1,03:41,/unke-dekhe-se,Urdu
3,Yeh Na Thi Hamari Qismat - Chitra Singh,Chitra Singh,/artist/chitra-singh,04:26,/yeh-na-thi-hamari-qismat-2,Urdu
4,Hazaron Khwahishen Aisi,Jagjit Singh,/artist/jagjeet-singh-1,05:39,/hazaron-khwahishen-aisi-1,Urdu


#### 1.3) Data Description

In [4]:
df.describe()

Unnamed: 0,name,singer,singer_id,duration,link,language
count,41355,41355,41355,41355,41355,41355
unique,31534,6196,6580,1309,36361,16
top,O Mere Dil Ke Chain,Lata Mangeshkar,/artist/lata-mangeshkar,03:02,/kya-khoob-lagti-ho,Telugu
freq,26,1410,1410,447,2,4996


#### 1.4) Checking Null Values

In [5]:
df.isnull().sum()

Unnamed: 0,0
name,0
singer,0
singer_id,0
duration,0
link,0
language,0


##### 1.5) Checking Duplicates

In [6]:
df['link'].duplicated().sum()

4994

##### 1.6) Removing Duplicates

In [8]:
df.drop_duplicates(subset='link', inplace=True)

### 2. Data Cleaning

#### 2.1) Cleaning Duration

In [20]:
duration = []

for i in df['duration']:

  if len(i.split(':')) == 2:
      mints = int(i.split(":")[0])
      secds = int(i.split(":")[1])
      duration.append(mints*60 + secds)

  else:
      hours = int(i.split(":")[0])
      mints = int(i.split(":")[1])
      secds = int(i.split(":")[2])
      duration.append(hours * 3600 + mints*60 + secds)

df['duration_in_seconds'] = duration

df.head()

Unnamed: 0,name,singer,singer_id,duration,link,language,duration_in_seconds
0,Dil - E - Nadan Tujhe,Chitra Singh|Jagjit Singh,/artist/chitra-singh|/artist/jagjeet-singh-1,05:00,/dil-e-nadan-tujhe-3,Urdu,300
1,Agar Hum Kahen Aur Woh Muskara De,Chitra Singh|Jagjit Singh,/artist/chitra-singh|/artist/jagjeet-singh-1,06:26,/agar-hum-kahen-aur-woh-muskura-den,Urdu,386
2,Unke Dekhe Se,Jagjit Singh,/artist/jagjeet-singh-1,03:41,/unke-dekhe-se,Urdu,221
3,Yeh Na Thi Hamari Qismat - Chitra Singh,Chitra Singh,/artist/chitra-singh,04:26,/yeh-na-thi-hamari-qismat-2,Urdu,266
4,Hazaron Khwahishen Aisi,Jagjit Singh,/artist/jagjeet-singh-1,05:39,/hazaron-khwahishen-aisi-1,Urdu,339


### 3. Data Anlaysis

#### 3.1) Total Songs

In [28]:
print(df['link'].nunique(), len(df))

36361 36361


#### 3.2) Total Singers

In [51]:
df['singer_id'].str.split('|').explode().nunique()

4528

### 3.3) Total Languages

In [52]:
df['language'].nunique()

15

#### 3.4) Accumulative Duration of Songs Listed on Gaana

In [79]:
total_seconds = df['duration_in_seconds'].sum()

hours = total_seconds//3600

reamaining_seconds = total_seconds - hours*3600
minutes = reamaining_seconds//60
seconds = reamaining_seconds%60

print(hours, 'hours',minutes,'minutes',seconds,'seconds')

3160 hours 21 minutes 7 seconds


#### 3.5) Top 10 Songs

In [91]:
df.groupby('name')['duration'].count().sort_values(ascending = False).head(10)

Unnamed: 0_level_0,duration
name,Unnamed: 1_level_1
Padhyams,20
V,14
Ninna Nanna,14
O Mere Dil Ke Chain,13
Ahe Ramahari,12
Yedhalo Tholivalape,12
Naguva Nayana,12
Tuma Gaan Mahu Machhi,12
Preethine Aa Dyavaru Thanda,11
Muttina Hanigalu,11


#### 3.6) Top 5 Languages

In [95]:
df['language'].value_counts().sort_values(ascending = False).head()

Unnamed: 0_level_0,count
language,Unnamed: 1_level_1
Telugu,4995
Hindi,4993
Marathi,4699
Tamil,4677
Punjabi,3818


#### 3.7) Top 10 Singers with most numbers of Solo

In [102]:
df['singer_id'].value_counts().head(12)

Unnamed: 0_level_0,count
singer_id,Unnamed: 1_level_1
/artist/s-p-balasubrahamanyam,1155
/artist/asha-bhosle-1,1037
/artist/p-suseela-2,1019
/artist/lata-mangeshkar,856
/artist/s-janaki,728
/artist/mohammad-rafi,698
/artist/s-p-balasubrahamanyam|/artist/p-suseela-2,682
/artist/kishore-kumar,663
/artist/noor-jehan,489
/artist/s-p-balasubrahamanyam|/artist/s-janaki,455


#### 3.8) Top 10 Singers with most numbers of Songs

In [63]:
df['duration_in_seconds'].sum()//3600

3160

In [65]:
(df['duration_in_seconds'].sum()%3600)/60

21.116666666666667

In [66]:
(df['duration_in_seconds'].sum()%3600)%60

7

In [None]:
'3160 hours 13 mints 10 seconds'

In [67]:
3160 * 3600 + 21*60 + 7

11377267

In [43]:
df['singer_id'].str.split('|').explode().nunique()

4528

In [23]:
df

Unnamed: 0,name,singer,singer_id,duration,link,language,duration_in_seconds
0,Dil - E - Nadan Tujhe,Chitra Singh|Jagjit Singh,/artist/chitra-singh|/artist/jagjeet-singh-1,05:00,/dil-e-nadan-tujhe-3,Urdu,300
1,Agar Hum Kahen Aur Woh Muskara De,Chitra Singh|Jagjit Singh,/artist/chitra-singh|/artist/jagjeet-singh-1,06:26,/agar-hum-kahen-aur-woh-muskura-den,Urdu,386
2,Unke Dekhe Se,Jagjit Singh,/artist/jagjeet-singh-1,03:41,/unke-dekhe-se,Urdu,221
3,Yeh Na Thi Hamari Qismat - Chitra Singh,Chitra Singh,/artist/chitra-singh,04:26,/yeh-na-thi-hamari-qismat-2,Urdu,266
4,Hazaron Khwahishen Aisi,Jagjit Singh,/artist/jagjeet-singh-1,05:39,/hazaron-khwahishen-aisi-1,Urdu,339
...,...,...,...,...,...,...,...
41350,Chaaridike Paaper Andhar (Manush Janam Diye Bi...,Kishore Kumar,/artist/kishore-kumar,03:23,/chaaridike-paaper-andhar-manush-janam-diye-bidhi,Bengali,203
41351,Ei Ki Go Shesh Daan,Feroza Begum,/artist/feroza-begum,03:56,/ei-kigo-sesh-daan,Bengali,236
41352,Ei Je Bangla,Sandhya Mukherjee|Hemant Kumar,/artist/sandhya-mukherjee|/artist/hemanta-mukh...,07:36,/ei-je-bangla,Bengali,456
41353,Jatone Hridaye Rekho,Ajoy Chakrabarty,/artist/ajoy-chakrabarty,04:29,/jatone-hridaye-rekho,Bengali,269


In [None]:
'Jagjit Singh, jagjit-singh-1'
'Jagjit Singh, jagjit-singh-2'
'Jagjit Singh, jagjit-singh-3'
'Jagjit Singh, jagjit-singh-4'

In [None]:
-- Overall Analysis


--  Yearly Anlaysis
# 7. Top Singers of the year | Plays

--  Monthly Anlaysis
# 7. Top Songs of the year | Plays
# 7. Top Singers of the year | Plays

--  Weekly Anlaysis
# 7. Top Songs of the year | Plays
# 7. Top Singers of the year | Plays