# Exploratory Data Analysis
This notebook loads Data-set.xlsx from the repository, summarizes the data, checks for missing values, saves a CSV copy, and generates simple plots saved into the working directory.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Load dataset from raw GitHub URL
url = 'https://raw.githubusercontent.com/Harshitpal1/Data-Analyst--Project/main/Data-set.xlsx'
try:
    df = pd.read_excel(url)
    print('Loaded dataset from URL')
except Exception as e:
    print('Failed to load from URL, please ensure the file exists in the repo or download manually:', e)

# Basic info
print('
Shape:', getattr(df, 'shape', 'N/A'))
print('
Columns and dtypes:')
print(df.dtypes)

print('
First 5 rows:')
print(df.head())

In [None]:
# Summary statistics and missing values
print('
Describe:
', df.describe(include='all'))
print('
Missing values per column:
', df.isnull().sum())

# Save a CSV copy for convenience
try:
    df.to_csv('Data-set.csv', index=False)
    print('Saved Data-set.csv')
except Exception as e:
    print('Could not save CSV:', e)

# Simple plots: histograms for numeric columns
num_cols = df.select_dtypes(include=['number']).columns.tolist()
if num_cols:
    df[num_cols].hist(figsize=(12, 8))
    plt.tight_layout()
    plt.savefig('numeric_histograms.png')
    print('Saved numeric_histograms.png')
else:
    print('No numeric columns to plot')