In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def read_worldbank_data(filename):
    worldbank_df = pd.read_csv(filename, skiprows=4)
    years_df = worldbank_df.T
    years_df.columns = years_df.iloc[0]
    years_df = years_df[1:]
    countries_df = years_df.T
    countries_df.reset_index(inplace=True)
    countries_df.rename(columns={'index': 'Country Name'}, inplace=True)
    return years_df, countries_df

In [None]:
filename = "C:\\Users\\deepi\\Downloads\\API_19_DS2_en_csv_v2_6224512\API_19_DS2_en_csv_v2_6224512.csv"
years_data, countries_data = read_worldbank_data(filename)

In [None]:
years_data.tail()

In [None]:
years_data = years_data.drop(years_data.index[-1])

In [None]:
years_data.tail()

In [None]:
years_data.fillna(years_data.median(), inplace=True)

In [None]:
countries_data.head()

In [None]:
countries_data = countries_data.iloc[:, :-1]

In [None]:
countries_data.fillna(countries_data.median(), inplace=True)

In [None]:
countries_data.head()

In [None]:
countries_data['Indicator Name'].unique()

In [None]:
selected_countries = ['United Kingdom', 'United States', 'China', 'India']
filtered_data = countries_data[(countries_data['Indicator Name'] == 'Electricity production from oil sources (% of total)') & (countries_data['Country Name'].isin(selected_countries))]
selected_columns = filtered_data.columns[4:]
selected_data = filtered_data[selected_columns]
summary_statistics = selected_data.describe()
print("Summary Statistics for Electricity production from oil sources (% of total):")
summary_statistics

In [None]:
selected_countries = ['United Kingdom', 'United States', 'China','India']
filtered_data = countries_data[(countries_data['Indicator Name'] == 'Electricity production from oil sources (% of total)') & (countries_data['Country Name'].isin(selected_countries))]
selected_columns = filtered_data.columns[24:-3:10]
filtered_data.set_index('Country Name')[selected_columns].T.plot(kind='bar', figsize=(10, 6))
plt.title('Bar Plot for Electricity production from oil sources (% of total) Since 1980')
plt.xlabel('Years')
plt.ylabel('Value')
plt.legend(title='Country Name', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
selected_countries = ['United Kingdom', 'United States', 'China', 'India']
filtered_data = countries_data[(countries_data['Indicator Name'] == 'Electricity production from coal sources (% of total)') & (countries_data['Country Name'].isin(selected_countries))]
selected_columns = filtered_data.columns[4:]
selected_data = filtered_data[selected_columns]
summary_statistics = selected_data.describe()
print("Summary Statistics for Electricity production from coal sources (% of total):")
summary_statistics

In [None]:
selected_countries = ['United Kingdom', 'United States', 'China','India']
filtered_data = countries_data[(countries_data['Indicator Name'] == 'Electricity production from coal sources (% of total)') & (countries_data['Country Name'].isin(selected_countries))]
selected_columns = filtered_data.columns[24:-3:10]
filtered_data.set_index('Country Name')[selected_columns].T.plot(kind='bar', figsize=(10, 6))
plt.title('Bar Plot for Electricity production from coal sources (% of total) Since 1980')
plt.xlabel('Years')
plt.ylabel('Value')
plt.legend(title='Country Name', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
selected_countries = ['United Kingdom', 'United States', 'China','India']
filtered_data = countries_data[(countries_data['Indicator Name'] == 'Electricity production from nuclear sources (% of total)') & (countries_data['Country Name'].isin(selected_countries))]
selected_columns = filtered_data.columns[24:-3:10]
filtered_data.set_index('Country Name')[selected_columns].T.plot(kind='bar', figsize=(10, 6))
plt.title('Bar Plot for Electricity production from nuclear sources (% of total) Since 1980')
plt.xlabel('Years')
plt.ylabel('Value')
plt.legend(title='Country Name', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
selected_countries = ['United Kingdom', 'United States', 'China','India']
filtered_data = countries_data[(countries_data['Indicator Name'] == 'Electricity production from natural gas sources (% of total)') & (countries_data['Country Name'].isin(selected_countries))]
selected_columns = filtered_data.columns[24:-3:10]
filtered_data.set_index('Country Name')[selected_columns].T.plot(kind='bar', figsize=(10, 6))
plt.title('Bar Plot for Electricity production from natural gas sources (% of total) Since 1980')
plt.xlabel('Years')
plt.ylabel('Value')
plt.legend(title='Country Name', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
selected_countries = ['United Kingdom', 'United States', 'China','India']
filtered_data = countries_data[(countries_data['Indicator Name'] == 'Electricity production from hydroelectric sources (% of total)') & (countries_data['Country Name'].isin(selected_countries))]
selected_columns = filtered_data.columns[24:-3:10]
filtered_data.set_index('Country Name')[selected_columns].T.plot(kind='bar', figsize=(10, 6))
plt.title('Bar Plot for Electricity production from hydroelectric sources (% of total) Since 1980')
plt.xlabel('Years')
plt.ylabel('Value')
plt.legend(title='Country Name', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
selected_indicators = [
    'Electricity production from oil sources (% of total)',
    'Electricity production from nuclear sources (% of total)',
    'Electricity production from natural gas sources (% of total)',
    'Electricity production from hydroelectric sources (% of total)',
    'Electricity production from coal sources (% of total)']

source_names = {
    'Electricity production from oil sources (% of total)': 'Oil Sources',
    'Electricity production from nuclear sources (% of total)': 'Nuclear Sources',
    'Electricity production from natural gas sources (% of total)': 'Natural Gas Sources',
    'Electricity production from hydroelectric sources (% of total)': 'Hydroelectric Sources',
    'Electricity production from coal sources (% of total)': 'Coal Sources'}

In [None]:
selected_data = countries_data[(countries_data['Country Name'] == 'United Kingdom') & (countries_data['Indicator Name'].isin(selected_indicators))]
year1980_column = selected_data.iloc[:, 24].values
year2015_column = selected_data.iloc[:, -8].values
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
wedges_1980, texts_1980, autotexts_1980 = axes[0].pie(year1980_column, autopct='%1.1f%%', startangle=140, labels=[source_names[indicator] for indicator in selected_indicators])
axes[0].set_title('Electricity Production Composition in 1980')
wedges_2015, texts_2015, autotexts_2015 = axes[1].pie(year2015_column, autopct='%1.1f%%', startangle=140, labels=[source_names[indicator] for indicator in selected_indicators])
axes[1].set_title('Electricity Production Composition in 2015')
plt.suptitle('% of Electricity Production from Different Sources in United Kingdom', fontsize=16, y=1.05)
plt.show()

In [None]:
selected_data = countries_data[(countries_data['Country Name'] == 'India') & (countries_data['Indicator Name'].isin(selected_indicators))]
year1980_column = selected_data.iloc[:, 24].values
year2015_column = selected_data.iloc[:, -8].values
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
wedges_1980, texts_1980, autotexts_1980 = axes[0].pie(year1980_column, autopct='%1.1f%%', startangle=140, labels=[source_names[indicator] for indicator in selected_indicators])
axes[0].set_title('Electricity Production Composition in 1980')
wedges_2015, texts_2015, autotexts_2015 = axes[1].pie(year2015_column, autopct='%1.1f%%', startangle=140, labels=[source_names[indicator] for indicator in selected_indicators])
axes[1].set_title('Electricity Production Composition in 2015')
plt.suptitle('% of Electricity Production from Different Sources in India', fontsize=16, y=1.05)
plt.show()

In [None]:
selected_data = countries_data[(countries_data['Country Name'] == 'United States') & (countries_data['Indicator Name'].isin(selected_indicators))]
year1980_column = selected_data.iloc[:, 24].values
year2015_column = selected_data.iloc[:, -8].values
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
wedges_1980, texts_1980, autotexts_1980 = axes[0].pie(year1980_column, autopct='%1.1f%%', startangle=140, labels=[source_names[indicator] for indicator in selected_indicators])
axes[0].set_title('Electricity Production Composition in 1980')
wedges_2015, texts_2015, autotexts_2015 = axes[1].pie(year2015_column, autopct='%1.1f%%', startangle=140, labels=[source_names[indicator] for indicator in selected_indicators])
axes[1].set_title('Electricity Production Composition in 2015')
plt.suptitle('% of Electricity Production from Different Sources in United States', fontsize=16, y=1.05)
plt.show()

In [None]:
selected_data = countries_data[(countries_data['Country Name'] == 'China') & (countries_data['Indicator Name'].isin(selected_indicators))]
year1980_column = selected_data.iloc[:, 24].values
year2015_column = selected_data.iloc[:, -8].values
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
wedges_1980, texts_1980, autotexts_1980 = axes[0].pie(year1980_column, autopct='%1.1f%%', startangle=140, labels=[source_names[indicator] for indicator in selected_indicators])
axes[0].set_title('Electricity Production Composition in 1980')
wedges_2015, texts_2015, autotexts_2015 = axes[1].pie(year2015_column, autopct='%1.1f%%', startangle=140, labels=[source_names[indicator] for indicator in selected_indicators])
axes[1].set_title('Electricity Production Composition in 2015')
plt.suptitle('% of Electricity Production from Different Sources in China', fontsize=16, y=1.05)
plt.show()

In [None]:
import seaborn as sns

indicators_to_plot = [
    'Electricity production from oil sources (% of total)',
    'Electricity production from nuclear sources (% of total)',
    'Electricity production from natural gas sources (% of total)',
    'Electricity production from hydroelectric sources (% of total)',
    'Electricity production from coal sources (% of total)',
    'Access to electricity (% of population)',
    'Population growth (annual %)',
    'SF6 gas emissions (thousand metric tons of CO2 equivalent)',
    'PFC gas emissions (thousand metric tons of CO2 equivalent)']

uk_data = countries_data[(countries_data['Country Name'] == 'United Kingdom') & (countries_data['Indicator Name'].isin(indicators_to_plot))]

heatmap_data = uk_data.set_index(['Indicator Name']).iloc[:, 4:].transpose()
plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_data.corr(), annot=True, cmap='coolwarm', fmt='.2f', linewidths=.5)
plt.title('Correlation Heatmap for Indicators in the United Kingdom')
plt.show()

In [None]:
uk_data = countries_data[(countries_data['Country Name'] == 'United Kingdom') & (countries_data['Indicator Name'].isin(indicators_to_plot))]

heatmap_data = uk_data.set_index(['Indicator Name']).iloc[:, 4:].transpose()
plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_data.corr(), annot=True, cmap='coolwarm', fmt='.2f', linewidths=.5)
plt.title('Correlation Heatmap for Indicators in the United Kingdom')
plt.show()

In [None]:
us_data = countries_data[(countries_data['Country Name'] == 'United States') & (countries_data['Indicator Name'].isin(indicators_to_plot))]

heatmap_data = us_data.set_index(['Indicator Name']).iloc[:, 4:].transpose()
plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_data.corr(), annot=True, cmap='coolwarm', fmt='.2f', linewidths=.5)
plt.title('Correlation Heatmap for Indicators in the United States')
plt.show()