In [14]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, mode

# Load dataset from Excel file
df = pd.read_excel(r'C:\Users\palni\Downloads\Project 1\Data-set.xlsx')

# Select numeric columns only
numeric_data = df.select_dtypes(include=[np.number])

# Dictionary to store EDA results
eda_results = {}

for col in numeric_data.columns:
    col_data = numeric_data[col].dropna()
    
    # Calculate mode (handle empty cases)
    try:
        mode_value = mode(col_data).mode[0]
    except IndexError:
        mode_value = np.nan
    
    eda_results[col] = {
        'mean': col_data.mean(),
        'median': col_data.median(),
        'mode': mode_value,
        'variance': col_data.var(),
        'std_dev': col_data.std(),
        'range': col_data.max() - col_data.min(),
        'skewness': skew(col_data),
        'kurtosis': kurtosis(col_data)
    }

# Create summary DataFrame
eda_df = pd.DataFrame(eda_results).T

# Save summary to CSV
eda_df.to_csv('eda_summary.csv')

# Display the first few rows
print(eda_df.head())


                                            mean  median  mode     variance  \
Coal_RB_4800_FOB_London_Close_USD      87.953184   76.19   NaN  2237.618566   
Coal_RB_5500_FOB_London_Close_USD     113.353155   95.80   NaN  3982.292145   
Coal_RB_5700_FOB_London_Close_USD     134.971807  108.91   NaN  6001.971375   
Coal_RB_6000_FOB_CurrentWeek_Avg_USD  144.351494  115.27   NaN  6972.281113   
Coal_India_5500_CFR_London_Close_USD  123.759710  111.20   NaN  3192.491618   

                                        std_dev   range  skewness  kurtosis  
Coal_RB_4800_FOB_London_Close_USD     47.303473  274.34  1.310980  2.100975  
Coal_RB_5500_FOB_London_Close_USD     63.105405  325.68  1.258757  1.116440  
Coal_RB_5700_FOB_London_Close_USD     77.472391  367.66  1.276265  0.802399  
Coal_RB_6000_FOB_CurrentWeek_Avg_USD  83.500186  383.69  1.267731  0.708799  
Coal_India_5500_CFR_London_Close_USD  56.502138  271.08  0.861061  0.169605  
