In [3]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
DATA_URL = "https://raw.githubusercontent.com/FairozAhmadSheikh/Datasets_CSV/refs/heads/main/Tips.csv"

In [9]:
def load_data(url):
    """Loads the dataset from the specified URL."""
    print(f"Loading data from: {url}")
    try:
        df = pd.read_csv(url)
        print("Data loaded successfully.")
        return df
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

In [None]:
def analyze_and_visualize(df):
    print("\n Initial Data Snapshot ")
    print(df.head())
    print("\n Data Info")
    df.info()
    plt.style.use('seaborn-v0_8-whitegrid')
    fig, axes = plt.subplots(2, 2, figsize=(18, 14))
    fig.suptitle('Analysis of Restaurant Tips Dataset', fontsize=20, fontweight='bold', y=1.02)

    sns.scatterplot(
        x='total_bill', 
        y='tip', 
        hue='sex', 
        data=df, 
        s=100,  # size of points
        alpha=0.7, 
        palette={'Male': '#3498db', 'Female': '#e74c3c'},
        ax=axes[0, 0]
    )
    axes[0, 0].set_title('Tip Amount vs. Total Bill (Colored by Sex)', fontsize=14)
    axes[0, 0].set_xlabel('Total Bill ($)', fontsize=12)
    axes[0, 0].set_ylabel('Tip ($)', fontsize=12)
    axes[0, 0].legend(title='Sex')
    
    day_order = ['Thur', 'Fri', 'Sat', 'Sun']
    sns.boxplot(
        x='day', 
        y='tip', 
        data=df, 
        order=day_order,
        palette='Set2', 
        ax=axes[0, 1]
    )
    sns.stripplot(x='day', y='tip', data=df, order=day_order, color='0.25', size=4, ax=axes[0, 1], jitter=True) # Add data points over the box plot
    axes[0, 1].set_title('Tip Distribution by Day of the Week', fontsize=14)
    axes[0, 1].set_xlabel('Day', fontsize=12)
    axes[0, 1].set_ylabel('Tip ($)', fontsize=12)
    
    sns.violinplot(
        x='time', 
        y='total_bill', 
        hue='smoker', 
        data=df, 
        split=True,
        palette={'Yes': '#2ecc71', 'No': '#9b59b6'},
        ax=axes[1, 0]
    )
    axes[1, 0].set_title('Total Bill Distribution by Time and Smoker Status', fontsize=14)
    axes[1, 0].set_xlabel('Time of Day', fontsize=12)
    axes[1, 0].set_ylabel('Total Bill ($)', fontsize=12)
    axes[1, 0].legend(title='Smoker')
    
    sns.countplot(
        x='size', 
        data=df, 
        palette='viridis', 
        ax=axes[1, 1]
    )
    axes[1, 1].set_title('Count of Parties by Size', fontsize=14)
    axes[1, 1].set_xlabel('Party Size', fontsize=12)
    axes[1, 1].set_ylabel('Count', fontsize=12)

    # Adjust layout to prevent overlap
    plt.tight_layout(rect=[0, 0.03, 1, 0.98])
    plt.show()


In [10]:
if __name__ == "__main__":
    data_frame = load_data(DATA_URL)
    
    if data_frame is not None:
        analyze_and_visualize(data_frame)

Loading data from: https://raw.githubusercontent.com/FairozAhmadSheikh/Datasets_CSV/refs/heads/main/Tips.csv
Data loaded successfully.

 Initial Data Snapshot 
   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4

 Data Info
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   total_bill  244 non-null    float64
 1   tip         244 non-null    float64
 2   sex         244 non-null    object 
 3   smoker      244 non-null    object 
 4   day         244 non-null    object 
 5   time        244 non-null    object 
 6   size        244 non-null    int64  
dtypes: float64(2), int64(1), o