# Import Library

In [15]:
import numpy as np
import matplotlib.pyplot as plt
import csv

# Importing the Dataset

In [16]:
data = []

# read the file
with open("./raw_data.csv", "r") as csvfile:
    file_reader = csv.reader(csvfile)
        
    for row in file_reader:
        data.append(row)
        
# change data into numpy array
data = np.array(data)


In [17]:
data

array([['MSN', 'YYYYMM', 'Value', 'Column_Order', 'Description', 'Unit'],
       ['CLETPUS', '194913', '135451.32', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195013', '154519.994', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ...,
       ['ELETPUS', '202209', '350919.559', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '202210', '314048.288', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '202211', '323198.745', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours']], dtype='<U80')

In [18]:
data.shape

(8737, 6)

# Saving File

In [19]:
np.savetxt("energy.npy", data, fmt="%s", delimiter=",")
np.savetxt("energy.csv", data, fmt="%s", delimiter=",")

# Describe Data

In [20]:
header = data[0,:]
values = data[1:,:]

print("Header:\n",header,"\n\n Values\n",values)

Header:
 ['MSN' 'YYYYMM' 'Value' 'Column_Order' 'Description' 'Unit'] 

 Values
 [['CLETPUS' '194913' '135451.32' '1'
  'Electricity Net Generation From Coal, All Sectors'
  'Million Kilowatthours']
 ['CLETPUS' '195013' '154519.994' '1'
  'Electricity Net Generation From Coal, All Sectors'
  'Million Kilowatthours']
 ['CLETPUS' '195113' '185203.657' '1'
  'Electricity Net Generation From Coal, All Sectors'
  'Million Kilowatthours']
 ...
 ['ELETPUS' '202209' '350919.559' '13'
  'Electricity Net Generation Total (including from sources not shown), All Sectors'
  'Million Kilowatthours']
 ['ELETPUS' '202210' '314048.288' '13'
  'Electricity Net Generation Total (including from sources not shown), All Sectors'
  'Million Kilowatthours']
 ['ELETPUS' '202211' '323198.745' '13'
  'Electricity Net Generation Total (including from sources not shown), All Sectors'
  'Million Kilowatthours']]


## Sources of Energy in US

In [21]:
(values[:,4])

array(['Electricity Net Generation From Coal, All Sectors',
       'Electricity Net Generation From Coal, All Sectors',
       'Electricity Net Generation From Coal, All Sectors', ...,
       'Electricity Net Generation Total (including from sources not shown), All Sectors',
       'Electricity Net Generation Total (including from sources not shown), All Sectors',
       'Electricity Net Generation Total (including from sources not shown), All Sectors'],
      dtype='<U80')

In [22]:
(values[1:,:])

array([['CLETPUS', '195013', '154519.994', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195113', '185203.657', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195213', '195436.666', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ...,
       ['ELETPUS', '202209', '350919.559', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '202210', '314048.288', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '202211', '323198.745', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours']], dtype='<U80')

In [23]:
num_of_sources = len(np.unique(values[:,4]))

print("The Usa generates Electricity from {} sources".format(num_of_sources))
print(np.unique(values[:,4]))

The Usa generates Electricity from 13 sources
['Electricity Net Generation From Coal, All Sectors'
 'Electricity Net Generation From Conventional Hydroelectric Power, All Sectors'
 'Electricity Net Generation From Geothermal, All Sectors'
 'Electricity Net Generation From Hydroelectric Pumped Storage, All Sectors'
 'Electricity Net Generation From Natural Gas, All Sectors'
 'Electricity Net Generation From Nuclear Electric Power, All Sectors'
 'Electricity Net Generation From Other Gases, All Sectors'
 'Electricity Net Generation From Petroleum, All Sectors'
 'Electricity Net Generation From Solar, All Sectors'
 'Electricity Net Generation From Waste, All Sectors'
 'Electricity Net Generation From Wind, All Sectors'
 'Electricity Net Generation From Wood, All Sectors'
 'Electricity Net Generation Total (including from sources not shown), All Sectors']


# Extract the wind energy data

In [24]:
wind_cond = values[:,4] ==  "Electricity Net Generation From Wind, All Sectors"


In [25]:
wind_data = values[wind_cond]

In [26]:
wind_data.shape

(672, 6)

In [27]:
Petroleum_cond = values[:,4] == 'Electricity Net Generation From Petroleum, All Sectors'

In [28]:
Petroleum_data = values[Petroleum_cond]

In [29]:
Petroleum_data.shape

(672, 6)

In [30]:
Coal_cond = values[:,4] == 'Electricity Net Generation From Coal, All Sectors'

In [31]:
Coal_data = values[Coal_cond]

In [32]:
Coal_data.shape

(672, 6)

In [96]:
Conventional_cond = values[:,4] == 'Electricity Net Generation From Conventional Hydroelectric Power, All Sectors'

In [97]:
ConventionalHydroelectricPower_data = values[Conventional_cond]

In [100]:
ConventionalHydroelectricPower_data.shape

(672, 6)

In [42]:
Hydroelectric_Pumped_Storage_cond = values[:,4] == 'Electricity Net Generation From Hydroelectric Pumped Storage, All Sectors'

In [43]:
Hydroelectric_Pumped_Storage_data = values[Hydroelectric_Pumped_Storage_cond]

In [44]:
Hydroelectric_Pumped_Storage_data.shape

(672, 6)

In [45]:
Geothermal_cond = values[:,4] ==  'Electricity Net Generation From Geothermal, All Sectors'

In [46]:
Geothermal_data = values[Geothermal_cond]

In [47]:
Geothermal_data.shape

(672, 6)

In [48]:
Natural_Gas_cond = values[:,4] == 'Electricity Net Generation From Natural Gas, All Sectors'

In [49]:
Natural_Gas_data = values[Natural_Gas_cond]

In [50]:
Natural_Gas_data.shape

(672, 6)

In [51]:
Nuclear_Electric_Power_cond = values[:,4] == 'Electricity Net Generation From Nuclear Electric Power, All Sectors'

In [52]:
Nuclear_Electric_Power_data = values[Nuclear_Electric_Power_cond]

In [53]:
Nuclear_Electric_Power_data.shape

(672, 6)

In [54]:
Other_Gases_cond = values[:,4] == 'Electricity Net Generation From Other Gases, All Sectors'

In [55]:
Other_Gases_data = values[Other_Gases_cond]

In [56]:
Other_Gases_data.shape

(672, 6)

In [57]:
Solar_cond = values[:,4] == 'Electricity Net Generation From Solar, All Sectors'

In [58]:
Solar_data = values[Solar_cond]

In [59]:
Solar_data.shape

(672, 6)

In [60]:
Waste_cond = values[:,4] == 'Electricity Net Generation From Waste, All Sectors'

In [61]:
Waste_data = values[Waste_cond]

In [62]:
Waste_data.shape

(672, 6)

In [63]:
Wood_cond = values[:,4] == 'Electricity Net Generation From Wood, All Sectors'

In [64]:
Wood_data = values[Wood_cond]

In [65]:
Wood_data.shape

(672, 6)

In [66]:
Unknown_Sources_cond = values[:,4] == 'Electricity Net Generation Total (including from sources not shown), All Sectors'

In [67]:
Unknown_Sources_data = values[Unknown_Sources_cond]

In [68]:
Unknown_Sources_data.shape

(672, 6)

# Save into File

In [69]:
np.savetxt("wind.csv", wind_data, fmt="%s", delimiter=",")

In [70]:
np.savetxt("Petroleum.csv", Petroleum_data, fmt="%s", delimiter=",")

In [71]:
np.savetxt("Coal.csv", Coal_data, fmt="%s", delimiter=",")

In [102]:
np.savetxt("Conventional_Hydroelectric_Power_data.csv", ConventionalHydroelectricPower_data, fmt="%s", delimiter=",")

In [73]:
np.savetxt("Hydroelectric_Pumped_Storage.csv", Hydroelectric_Pumped_Storage_data, fmt="%s", delimiter=",")

In [74]:
np.savetxt("Geothermal.csv", Geothermal_data, fmt="%s", delimiter=",")

In [75]:
np.savetxt("Natural_Gas.csv", Natural_Gas_data, fmt="%s", delimiter=",")

In [76]:
np.savetxt("Nuclear_Electric_Power.csv", Nuclear_Electric_Power_data, fmt="%s", delimiter=",")

In [77]:
np.savetxt("Other_Gases.csv", Other_Gases_data, fmt="%s", delimiter=",")

In [78]:
np.savetxt("Solar.csv", Solar_data, fmt="%s", delimiter=",")

In [79]:
np.savetxt("Waste.csv", Waste_data, fmt="%s", delimiter=",")

In [80]:
np.savetxt("Wood.csv", Wood_data, fmt="%s", delimiter=",")

In [81]:
np.savetxt("Unknown_Sources.csv", Unknown_Sources_data, fmt="%s", delimiter=",")

# Importing the dataset

In [103]:
energy = []

# read the file
with open("./raw_data.csv", "r") as csvfile:
    file_reader = csv.reader(csvfile)
        
    for row in file_reader:
       energy.append(row)
        
# change data into numpy array
energy = np.array(energy)

In [104]:
energy

array([['MSN', 'YYYYMM', 'Value', 'Column_Order', 'Description', 'Unit'],
       ['CLETPUS', '194913', '135451.32', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195013', '154519.994', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ...,
       ['ELETPUS', '202209', '350919.559', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '202210', '314048.288', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '202211', '323198.745', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours']], dtype='<U80')

# Saving File

In [105]:
np.savetxt("energy.csv", energy, fmt="%s", delimiter=",")


# Describe Data

In [119]:
header = energy[0,:]
data = energy[1:,:]

print("Header:\n",header,"\n\n Data\n",data)

Header:
 ['MSN' 'YYYYMM' 'Value' 'Column_Order' 'Description' 'Unit'] 

 Data
 [['CLETPUS' '194913' '135451.32' '1'
  'Electricity Net Generation From Coal, All Sectors'
  'Million Kilowatthours']
 ['CLETPUS' '195013' '154519.994' '1'
  'Electricity Net Generation From Coal, All Sectors'
  'Million Kilowatthours']
 ['CLETPUS' '195113' '185203.657' '1'
  'Electricity Net Generation From Coal, All Sectors'
  'Million Kilowatthours']
 ...
 ['ELETPUS' '202209' '350919.559' '13'
  'Electricity Net Generation Total (including from sources not shown), All Sectors'
  'Million Kilowatthours']
 ['ELETPUS' '202210' '314048.288' '13'
  'Electricity Net Generation Total (including from sources not shown), All Sectors'
  'Million Kilowatthours']
 ['ELETPUS' '202211' '323198.745' '13'
  'Electricity Net Generation Total (including from sources not shown), All Sectors'
  'Million Kilowatthours']]


In [120]:
annual_mask = np.array([x[-2:] == '13' for x in wind_data[:,1]] )

In [121]:
annual_wind_data = wind_data[annual_mask]

In [122]:
annual_wind_data

array([['WYETPUS', '194913', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '195013', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '195113', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '195213', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '195313', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '195413', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '195513', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
     

In [129]:
wind_data[:,2]

array(['Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', 'Not Available', 'Not Available', 'Not Available',
       'Not Available', '

In [131]:
available_mask = wind_data[:,2] != "not Available"
    
wind_data = wind_data[available_mask]

In [133]:
wind_data

array([['WYETPUS', '194913', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '195013', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '195113', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ...,
       ['WYETPUS', '202209', '27024.307', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '202210', '32824.906', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '202211', '41971.407', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours']], dtype='<U80')

# Analysis

In [134]:
np.array([x[-2:] == '13' for x in wind_data[:,1]] )

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False, False, False,
       False, False, False, False, False, False, False, False, False,
        True, False, False, False, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False,  True,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False, False, False,  True, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True,

In [140]:
available_mask = wind_data[:,2] != "not Available"
    
wind_data = wind_data[available_mask]

In [138]:
wind_data

array([['WYETPUS', '194913', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '195013', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '195113', 'Not Available', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ...,
       ['WYETPUS', '202209', '27024.307', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '202210', '32824.906', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '202211', '41971.407', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours']], dtype='<U80')

# 1. Total Energy Generated

In [141]:
all_energy = annual_wind_data[:,2].astype(float)

ValueError: could not convert string to float: 'Not Available'

In [136]:
print("The total wind energy generated in USA is :{} killowatt per Hours".format(all_energy.sum()))

NameError: name 'all_energy' is not defined

# 2. The Mean  Energy Generated

In [86]:
all_energy.mean()

NameError: name 'all_energy' is not defined

# The Standard Deviation of Energy Generated

In [9]:
all_energy.std()

NameError: name 'all_energy' is not defined

In [14]:
x = wind_data[:,1]
y = wind_data[:,2] # year

NameError: name 'wind_data' is not defined

In [13]:
plt.plot(x,y)
plt.xlabel("Energy Generated")
plt.ylabel("Date")
plt.show()

NameError: name 'x' is not defined