In [2]:
import pandas as pd

# Load the dataset
data = pd.read_csv("/content/googleplaystore.csv")

# Display first and last few rows
data.head()
data.tail(3)

# Dataset shape
print("Number of rows:", data.shape[0])
print("Number of columns:", data.shape[1])

# Dataset information and summary
data.info()
data.describe(include="all")

# Columns list
data.columns

# Extracting 'App' column and filtering for 'Astrology' apps
data['App']
data['App'].str.contains('Astrology')  # Case-sensitive
data['App'].str.contains('Astrology', case=False)  # Case-insensitive
filtered_apps = data[data['App'].str.contains('Astrology', case=False)]
len(filtered_apps)

# Analyzing Ratings
data['Rating']
data['Rating'].mean()

# Categories
data['Category']
data['Category'].unique()
len(data['Category'].unique())
data['Category'].nunique()

# Finding the Category with the Highest Average Rating
data.groupby('Category')['Rating'].mean().sort_values(ascending=False)

# Finding the Total Number of Apps with a 5-Star Rating
five_star_apps = data[data['Rating'] == 5.0]
len(five_star_apps)

# Converting 'Reviews' to Numeric
data['Reviews'] = data['Reviews'].replace('3.0M', '3.0')
data['Reviews'] = data['Reviews'].astype(float)
data['Reviews'].mean()

# Counting Free and Paid Apps
data['Type'].value_counts()

# Finding the App with the Maximum Reviews
max_reviews_app = data.loc[data['Reviews'].idxmax(), 'App']
print("App with maximum reviews:", max_reviews_app)

# Displaying Top 5 Apps with Highest Reviews
top_5_reviews = data.nlargest(5, 'Reviews')['App']
print("Top 5 Apps with highest reviews:", top_5_reviews.tolist())

# Finding the Average Rating of Free and Paid Apps
data.groupby('Type')['Rating'].mean()

# Cleaning 'Installs' Column
data['Installs'] = data['Installs'].str.replace(',', '').str.replace('+', '').replace('Free', '0')
data['Installs'] = data['Installs'].astype(float)

# Displaying Top 5 Apps with Maximum Installs
top_5_installs = data.nlargest(5, 'Installs')['App']
print("Top 5 Apps with maximum installs:", top_5_installs.tolist())


Number of rows: 10841
Number of columns: 14
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10841 entries, 0 to 10840
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Unnamed: 0      10841 non-null  int64  
 1   App             10841 non-null  object 
 2   Category        10841 non-null  object 
 3   Rating          9367 non-null   float64
 4   Reviews         10841 non-null  object 
 5   Size            10841 non-null  object 
 6   Installs        10841 non-null  object 
 7   Type            10840 non-null  object 
 8   Price           10841 non-null  object 
 9   Content Rating  10840 non-null  object 
 10  Genres          10841 non-null  object 
 11  Last Updated    10841 non-null  object 
 12  Current Ver     10833 non-null  object 
 13  Android Ver     10838 non-null  object 
dtypes: float64(1), int64(1), object(12)
memory usage: 1.2+ MB
App with maximum reviews: Facebook
Top 5 Apps with highest revie