In [None]:
# Flight Price Dataset Analysis

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the flight price dataset
def load_flight_price_data(filepath):
    return pd.read_csv(filepath)

# Q1: Examine dimensions
def get_dimensions(df):
    return df.shape

# Q2: Plot histogram for price distribution
def plot_price_distribution(df):
    plt.hist(df['Price'], bins=30, edgecolor='k')
    plt.title("Price Distribution")
    plt.xlabel("Price")
    plt.ylabel("Frequency")
    plt.show()

# Q3: Get price range
def get_price_range(df):
    return df['Price'].min(), df['Price'].max()

# Q4: Boxplot of prices by airline
def plot_prices_by_airline(df):
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='Airline', y='Price', data=df)
    plt.title("Flight Prices by Airline")
    plt.xticks(rotation=45)
    plt.show()

# Q5: Identify and discuss outliers
def identify_outliers(df):
    sns.boxplot(x=df['Price'])
    plt.title("Outlier Detection")
    plt.show()
    return df['Price'].quantile(0.25), df['Price'].quantile(0.75)

# Q6: Analyze peak travel season
def analyze_peak_season(df):
    df['Month'] = pd.to_datetime(df['Date']).dt.month
    monthly_trends = df.groupby('Month').size()
    plt.plot(monthly_trends.index, monthly_trends.values)
    plt.title("Monthly Flight Trends")
    plt.xlabel("Month")
    plt.ylabel("Number of Flights")
    plt.show()

# Q7: Analyze trends in flight prices
def analyze_price_trends(df):
    df['Month'] = pd.to_datetime(df['Date']).dt.month
    sns.lineplot(x='Month', y='Price', data=df)
    plt.title("Flight Price Trends by Month")
    plt.show()

# Q8: Factors affecting flight prices
def analyze_price_factors(df):
    sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
    plt.title("Correlation Heatmap")
    plt.show()

# Google Playstore Dataset Analysis

# Load the Google Playstore dataset
def load_playstore_data(filepath):
    return pd.read_csv(filepath)

# Q9: Examine dimensions
def get_playstore_dimensions(df):
    return df.shape

# Q10: Boxplot of ratings by category
def plot_ratings_by_category(df):
    plt.figure(figsize=(12, 8))
    sns.boxplot(x='Category', y='Rating', data=df)
    plt.title("Ratings by Category")
    plt.xticks(rotation=45)
    plt.show()

# Q11: Check for missing values
def check_missing_values(df):
    return df.isnull().sum()

# Q12: Relationship between size and rating
def plot_size_vs_rating(df):
    plt.scatter(df['Size'], df['Rating'])
    plt.title("Size vs Rating")
    plt.xlabel("Size")
    plt.ylabel("Rating")
    plt.show()

# Q13: Average prices by app type
def plot_prices_by_type(df):
    sns.barplot(x='Type', y='Price', data=df)
    plt.title("Average Prices by App Type")
    plt.show()

# Q14: Top 10 most popular apps
def top_popular_apps(df):
    return df.groupby('App')['Installs'].sum().nlargest(10)

# Q15: Popular app categories
def analyze_popular_categories(df):
    category_trends = df.groupby('Category').size()
    category_trends.sort_values(ascending=False).plot(kind='bar')
    plt.title("Popular App Categories")
    plt.show()

# Q16: Successful app developers
def analyze_successful_developers(df):
    developer_performance = df.groupby('Developer')['Installs'].sum()
    developer_performance.sort_values(ascending=False).head(10).plot(kind='bar')
    plt.title("Top Developers by Installs")
    plt.show()

# Q17: Best time to launch an app
def analyze_best_launch_time(df):
    df['Year'] = pd.to_datetime(df['Last Updated']).dt.year
    yearly_trends = df.groupby('Year').size()
    yearly_trends.plot(kind='line')
    plt.title("Trends in App Updates")
    plt.show()
