In [1]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

data = pd.read_csv('Electricity BILL.csv')



In [2]:
plot_dirs = ['pair_plots', 'box_plots', 'violin_plots', 'count_plots', 'correlation_heatmaps']
for dir in plot_dirs:
    if not os.path.exists(dir):
        os.makedirs(dir)


A pair plot .

In [3]:
sns.pairplot(data)
plt.title('Pair Plots of Numerical Features')
plt.savefig('pair_plots/pair_plots.png')
plt.close()


A box plot visualizes the distribution and detects outliers in numerical features.

In [4]:
numerical_features = data.select_dtypes(include=['float64', 'int64']).columns

for feature in numerical_features:
    plt.figure(figsize=(8, 6))
    sns.boxplot(x=data[feature])
    plt.title(f'Box Plot of {feature}')
    plt.savefig(f'box_plots/box_plot_{feature}.png')
    plt.close()


A violin plot shows the distribution of the data for numerical variables, combining the density plot and box plot.

In [5]:
for feature in numerical_features:
    plt.figure(figsize=(8, 6))
    sns.violinplot(x=data[feature])
    plt.title(f'Violin Plot of {feature}')
    plt.savefig(f'violin_plots/violin_plot_{feature}.png')
    plt.close()


Count plots are useful for visualizing the distribution of categorical features.

In [6]:
categorical_features = data.select_dtypes(include=['object']).columns

for feature in categorical_features:
    plt.figure(figsize=(10, 6))
    sns.countplot(y=data[feature])
    plt.title(f'Count Plot of {feature}')
    plt.savefig(f'count_plots/count_plot_{feature}.png')
    plt.close()


A heatmap visualizes the correlation matrix of numerical variables.

In [7]:
# Select only numeric columns for correlation
numeric_data = data.select_dtypes(include=['float64', 'int64'])

correlation_matrix = numeric_data.corr()


In [8]:

plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap')
plt.savefig('correlation_heatmaps/correlation_heatmap.png')
plt.close()
