# Electricity Generation Data Analysis

This project explores electricity generation data to uncover trends, patterns, and key insights using visualizations
We will load the dataset, perform data cleaning, and generate graphs to support our analysis.

In [23]:
import pandas as pd
import matplotlib.pyplot as plt

In [37]:
df = pd.read_csv('electricty.csv', encoding='utf-16', sep='\t')

In [38]:
# Display first few rows
df.head(10)

Unnamed: 0,Year of Period,Month of Period,Unit,Coal,Combust. Renew.,Hydro,Natural Gas,Oil,Other,Peat & BM,Solar Farms,Wastes,Wind
0,2024,January,GWh,99,13,132,1323,28,1,39,17,56,1142
1,2024,February,GWh,86,12,114,992,11,1,54,20,50,1197
2,2024,March,GWh,87,14,117,1020,6,1,49,35,23,1331
3,2024,April,GWh,83,12,89,1072,5,1,52,65,39,929
4,2024,May,GWh,25,12,35,1350,25,1,59,94,56,560
5,2024,June,GWh,72,11,15,1045,3,1,49,101,55,640
6,2024,July,GWh,127,12,11,1166,13,1,31,115,52,587
7,2024,August,GWh,69,12,31,964,3,1,1,98,56,948
8,2024,September,GWh,85,11,23,1167,49,1,1,74,53,795
9,2024,October,GWh,87,13,48,1216,15,1,40,54,56,1032


In [39]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180 entries, 0 to 179
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Year of Period   180 non-null    int64 
 1   Month of Period  180 non-null    object
 2   Unit             180 non-null    object
 3   Coal             180 non-null    int64 
 4   Combust. Renew.  180 non-null    int64 
 5   Hydro            180 non-null    int64 
 6   Natural Gas      180 non-null    object
 7   Oil              180 non-null    int64 
 8   Other            180 non-null    int64 
 9   Peat & BM        180 non-null    int64 
 10  Solar Farms      180 non-null    int64 
 11  Wastes           180 non-null    int64 
 12  Wind             180 non-null    object
dtypes: int64(9), object(4)
memory usage: 18.4+ KB


In [40]:
# Check for missing values
df.isnull().sum()

Year of Period     0
Month of Period    0
Unit               0
Coal               0
Combust. Renew.    0
Hydro              0
Natural Gas        0
Oil                0
Other              0
Peat & BM          0
Solar Farms        0
Wastes             0
Wind               0
dtype: int64

In [41]:
# Convert date columns if any exist
if 'Date' in df.columns:
    df['Date'] = pd.to_datetime(df['Date'])

In [42]:
df.head(10)

Unnamed: 0,Year of Period,Month of Period,Unit,Coal,Combust. Renew.,Hydro,Natural Gas,Oil,Other,Peat & BM,Solar Farms,Wastes,Wind
0,2024,January,GWh,99,13,132,1323,28,1,39,17,56,1142
1,2024,February,GWh,86,12,114,992,11,1,54,20,50,1197
2,2024,March,GWh,87,14,117,1020,6,1,49,35,23,1331
3,2024,April,GWh,83,12,89,1072,5,1,52,65,39,929
4,2024,May,GWh,25,12,35,1350,25,1,59,94,56,560
5,2024,June,GWh,72,11,15,1045,3,1,49,101,55,640
6,2024,July,GWh,127,12,11,1166,13,1,31,115,52,587
7,2024,August,GWh,69,12,31,964,3,1,1,98,56,948
8,2024,September,GWh,85,11,23,1167,49,1,1,74,53,795
9,2024,October,GWh,87,13,48,1216,15,1,40,54,56,1032
