In [1]:
#import main libraries to be used in the project
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import os




In [14]:
#import the dataset
df = pd.read_csv('Data/nuclear_power_plants.csv')
df.head()

Unnamed: 0,Id,Name,Latitude,Longitude,Country,CountryCode,Status,ReactorType,ReactorModel,ConstructionStartAt,OperationalFrom,OperationalTo,Capacity,LastUpdatedAt,Source,IAEAId
0,1,Ågesta,59.206,18.0829,Sweden,SE,Shutdown,PHWR,,1957-12-01,1964-05-01,1974-06-02,9.0,2015-05-24T04:51:37+03:00,WNA/IAEA,528.0
1,2,Akademik Lomonosov-1,69.709579,170.30625,Russia,RU,Operational,PWR,KLT-40S 'Floating',2007-04-15,2020-05-22,,30.0,2021-05-31,WNA/IAEA/Google Maps,895.0
2,3,Akademik Lomonosov-2,69.709579,170.30625,Russia,RU,Operational,PWR,KLT-40S 'Floating',2007-04-15,2020-05-22,,30.0,2021-05-31,WNA/IAEA/Google Maps,896.0
3,4,Akhvaz-1,,,Iran,IR,Planned,,,,,,,,WNA,
4,5,Akhvaz-2,,,Iran,IR,Planned,,,,,,,,WNA,


In [15]:
df.shape

(803, 16)

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 803 entries, 0 to 802
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Id                   803 non-null    int64  
 1   Name                 803 non-null    object 
 2   Latitude             759 non-null    float64
 3   Longitude            759 non-null    float64
 4   Country              803 non-null    object 
 5   CountryCode          803 non-null    object 
 6   Status               803 non-null    object 
 7   ReactorType          794 non-null    object 
 8   ReactorModel         713 non-null    object 
 9   ConstructionStartAt  722 non-null    object 
 10  OperationalFrom      636 non-null    object 
 11  OperationalTo        212 non-null    object 
 12  Capacity             724 non-null    float64
 13  LastUpdatedAt        733 non-null    object 
 14  Source               803 non-null    object 
 15  IAEAId               713 non-null    flo

In [17]:
# create a new dataframe for lattitude, longitude, name and country
df_1 = df[['Name', 'Country', 'Latitude', 'Longitude', 'Status', 'Capacity']]
df_1.head()

Unnamed: 0,Name,Country,Latitude,Longitude,Status,Capacity
0,Ågesta,Sweden,59.206,18.0829,Shutdown,9.0
1,Akademik Lomonosov-1,Russia,69.709579,170.30625,Operational,30.0
2,Akademik Lomonosov-2,Russia,69.709579,170.30625,Operational,30.0
3,Akhvaz-1,Iran,,,Planned,
4,Akhvaz-2,Iran,,,Planned,


In [18]:
# identify missing values
df_1.isnull().sum()

Name          0
Country       0
Latitude     44
Longitude    44
Status        0
Capacity     79
dtype: int64

In [19]:
# get current mean of capacity
df_1['Capacity'].mean()

778.4198895027624

In [20]:
# inpute missing values for capacity (will not effect mean as we are imputing the mean for the missing values)
df_1['Capacity'].fillna(df_1['Capacity'].mean(), inplace=True)
df_1.isnull().sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1['Capacity'].fillna(df_1['Capacity'].mean(), inplace=True)


Name          0
Country       0
Latitude     44
Longitude    44
Status        0
Capacity      0
dtype: int64

In [23]:
# create a list of only entries with missing values for latitude and longitude and save as a new dataframe
df_missing = df_1[df_1['Latitude'].isnull()]
df_missing.head()

Unnamed: 0,Name,Country,Latitude,Longitude,Status,Capacity
3,Akhvaz-1,Iran,,,Planned,778.41989
4,Akhvaz-2,Iran,,,Planned,778.41989
33,Chutka-1,India,,,Planned,778.41989
34,Chutka-2,India,,,Planned,778.41989
316,Hongshiding-1 (Rushan-1),China,,,Planned,778.41989


In [24]:
# create new dataframe excluding missing values
df_map = df_1.dropna(axis=0, how='any')
df_map.head()


Unnamed: 0,Name,Country,Latitude,Longitude,Status,Capacity
0,Ågesta,Sweden,59.206,18.0829,Shutdown,9.0
1,Akademik Lomonosov-1,Russia,69.709579,170.30625,Operational,30.0
2,Akademik Lomonosov-2,Russia,69.709579,170.30625,Operational,30.0
5,Akkuyu-1,Türkiye,36.144444,33.541111,Under Construction,1114.0
6,Akkuyu-2,Türkiye,36.144444,33.541111,Under Construction,1114.0


In [28]:
# get unique values for status
df_map['Status'].unique()

array(['Shutdown', 'Operational', 'Under Construction', 'Planned',
       'Cancelled Construction', 'Suspended Construction', 'Unknown',
       'Decommissioning Completed', 'Suspended Operation',
       'Never Commissioned'], dtype=object)

In [31]:
# plot map of nuclear power plants using folium
# Create a Folium map
m = folium.Map()

# Add markers to the map
for index, row in df_map.iterrows():
    name = row['Name']
    country = row['Country']
    latitude = row['Latitude']
    longitude = row['Longitude']
    status = row['Status']
    capacity = row['Capacity']

    marker_color = {
        'Operational': 'green',
        'Under Construction': 'yellow',
        'Planned': 'orange',
        'Cancelled Construction': 'red',
        'Suspended Construction': 'red',
        'Unknown': 'red',
        'Decommissioning Completed': 'red',
        'Suspended Operation': 'red',
        'Never Commissioned': 'red',
        'Shutdown': 'red'
    }[status]

    marker = folium.Marker([latitude, longitude], tooltip=f"{name} ({country}): {capacity} MWe")
    marker.add_to(m)


In [32]:
m