# Import Dataset

In [129]:
import pandas as pd

In [130]:
df = pd.read_csv('googleplaystore.csv')

# 1. Top 5 rows

In [131]:
df.head(5)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


# 2. Bottom 5 rows

In [132]:
df.tail(5)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53M,"5,000+",Free,0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6M,100+,Free,0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
10838,Parkinson Exercices FR,MEDICAL,,3,9.5M,"1,000+",Free,0,Everyone,Medical,"January 20, 2017",1.0,2.2 and up
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,Varies with device,"1,000+",Free,0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device
10840,iHoroscope - 2018 Daily Horoscope & Astrology,LIFESTYLE,4.5,398307,19M,"10,000,000+",Free,0,Everyone,Lifestyle,"July 25, 2018",Varies with device,Varies with device


# 3. Shape of Dataset

In [133]:
print(f'Rows: {df.shape[0]}, Columns: {df.shape[1]}')

Rows: 10841, Columns: 13


# 4. Information about Dataset

In [134]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10841 entries, 0 to 10840
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   App             10841 non-null  object 
 1   Category        10841 non-null  object 
 2   Rating          9367 non-null   float64
 3   Reviews         10841 non-null  object 
 4   Size            10841 non-null  object 
 5   Installs        10841 non-null  object 
 6   Type            10840 non-null  object 
 7   Price           10841 non-null  object 
 8   Content Rating  10840 non-null  object 
 9   Genres          10841 non-null  object 
 10  Last Updated    10841 non-null  object 
 11  Current Ver     10833 non-null  object 
 12  Android Ver     10838 non-null  object 
dtypes: float64(1), object(12)
memory usage: 1.1+ MB


# 5. Statistics about dataframe

In [135]:
df.describe()

Unnamed: 0,Rating
count,9367.0
mean,4.193338
std,0.537431
min,1.0
25%,4.0
50%,4.3
75%,4.5
max,19.0


In [136]:
df.describe(include='all')

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
count,10841,10841,9367.0,10841.0,10841,10841,10840,10841.0,10840,10841,10841,10833,10838
unique,9660,34,,6002.0,462,22,3,93.0,6,120,1378,2832,33
top,ROBLOX,FAMILY,,0.0,Varies with device,"1,000,000+",Free,0.0,Everyone,Tools,"August 3, 2018",Varies with device,4.1 and up
freq,9,1972,,596.0,1695,1579,10039,10040.0,8714,842,326,1459,2451
mean,,,4.193338,,,,,,,,,,
std,,,0.537431,,,,,,,,,,
min,,,1.0,,,,,,,,,,
25%,,,4.0,,,,,,,,,,
50%,,,4.3,,,,,,,,,,
75%,,,4.5,,,,,,,,,,


# 6. Total number of app titles contain astrology

In [137]:
sum(df['App'].str.contains('astrology', case=False))

3

# 7. Find average app rating

In [138]:
df['Rating'].mean()

4.193338315362443

# 8. Find total number of unique category

In [139]:
df['Category'].nunique()

34

# 9. Which Category gets the highest average rating?

In [140]:
df.groupby('Category')['Rating'].mean().sort_values(ascending=False).head(1)

Category
1.9    19.0
Name: Rating, dtype: float64

# 10. Find total number of apps having 5 star rating

In [141]:
sum(df['Rating']==5.0)

274

# 11. Find average value of reviews

In [142]:
df['Reviews'].dtype

dtype('O')

In [143]:
df['Reviews'] = df['Reviews'].replace('3.0M', 3.0)

In [144]:
df['Reviews'] = df['Reviews'].astype(float)

In [145]:
df['Reviews'].mean()

444111.9265750392

# 12. Find total number of Free and Paid Apps

In [146]:
df.columns

Index(['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type',
       'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver',
       'Android Ver'],
      dtype='object')

In [147]:
df['Type'].value_counts()

Type
Free    10039
Paid      800
0           1
Name: count, dtype: int64

# 13. Which app has maximum reviews?

In [148]:
df[df['Reviews']==df['Reviews'].max()]['App']

2544    Facebook
Name: App, dtype: object

# 14. Display Top 5 Apps having highest reviews

In [149]:
df.groupby('App')['Reviews'].sum().sort_values(ascending=False).head(5)

App
Instagram                                   266241989.0
WhatsApp Messenger                          207348304.0
Clash of Clans                              179558781.0
Messenger – Text and Video Chat for Free    169932272.0
Subway Surfers                              166331958.0
Name: Reviews, dtype: float64

In [150]:
index = df['Reviews'].sort_values(ascending=False).head().index
df.iloc[index]['App']

2544              Facebook
3943              Facebook
381     WhatsApp Messenger
336     WhatsApp Messenger
3904    WhatsApp Messenger
Name: App, dtype: object

# 15. Find average rating of free and paid apps

In [151]:
df.groupby('Type')['Rating'].mean()

Type
0       19.000000
Free     4.186203
Paid     4.266615
Name: Rating, dtype: float64

# 16. Display top 5 apps having maximum installs

In [152]:
df['Installs'].dtype

dtype('O')

In [153]:
df['Installs'] = df['Installs'].str.replace(',', '')

In [154]:
df['Installs'] = df['Installs'].str.replace('+', '')

In [155]:
df['Installs'] = df['Installs'].replace('Free', 0)

In [161]:
df['Installs'] = df['Installs'].astype(int)

In [166]:
index = df['Installs'].sort_values(ascending=False).head(5).index
df.iloc[index]['App']

3896                              Subway Surfers
3943                                    Facebook
335     Messenger – Text and Video Chat for Free
3523                                Google Drive
3565                                Google Drive
Name: App, dtype: object