### Extract Total Solar Irradiance (TSI) from JSON

This script gets data from the SORCE project of NASA for Total Solar Irradiance ("http://lasp.colorado.edu/lisird/latis/nrl2_tsi_P1M.json?time%3E=1970-01-01T12:00&format_time(yyyy-DD-MM))

Data is request only after 1970 for each month. 

The TSI is represented in watts/ sq. meter

In [1]:
#Import Dependencies
import json
import requests
import pandas as pd
from pandas.io.json import json_normalize

from pprint import pprint


In [2]:
# URL for extracting data
url = "http://lasp.colorado.edu/lisird/latis/nrl2_tsi_P1M.json?time%3E=1970-01-01T12:00&format_time(yyyy-DD-MM)"

In [21]:
# Extract JSON though requests.get()
try:
    resp = requests.get(url)
    
    #check if the status code is other 200 (ie. not successful request)
    if(resp.status_code != 200):
        raise HTTPError
    
    # extract the JSON data
    TSI_json = resp.json()
    
except ConnectionError as c:
    print("Error in Connection :" + e)

except HTTPError as h:
    print("Unsuccessful in obtaining JSON : " + h)
    

In [22]:
## Print JSON Outout for verification
pprint(TSI_json)

{'nrl2_tsi_P1M': {'samples': [{'irradiance': 1361.3564453125,
                               'time': '1970-15-01',
                               'uncertainty': 0.5603513121604919},
                              {'irradiance': 1361.090576171875,
                               'time': '1970-46-02',
                               'uncertainty': 0.6423571705818176},
                              {'irradiance': 1361.6412353515625,
                               'time': '1970-74-03',
                               'uncertainty': 0.5571277141571045},
                              {'irradiance': 1361.3487548828125,
                               'time': '1970-105-04',
                               'uncertainty': 0.610397219657898},
                              {'irradiance': 1361.2384033203125,
                               'time': '1970-135-05',
                               'uncertainty': 0.6647059917449951},
                              {'irradiance': 1361.541015625,
                 

                              {'irradiance': 1360.6123046875,
                               'time': '1985-135-05',
                               'uncertainty': 0.10417693108320236},
                              {'irradiance': 1360.6754150390625,
                               'time': '1985-166-06',
                               'uncertainty': 0.08748660236597061},
                              {'irradiance': 1360.633544921875,
                               'time': '1985-196-07',
                               'uncertainty': 0.10857190191745758},
                              {'irradiance': 1360.765869140625,
                               'time': '1985-227-08',
                               'uncertainty': 0.0635213553905487},
                              {'irradiance': 1360.6533203125,
                               'time': '1985-258-09',
                               'uncertainty': 0.033385731279850006},
                              {'irradiance': 1360.544921875,
            

                               'uncertainty': 0.05944843217730522},
                              {'irradiance': 1360.7457275390625,
                               'time': '1997-105-04',
                               'uncertainty': 0.06575540453195572},
                              {'irradiance': 1360.7794189453125,
                               'time': '1997-135-05',
                               'uncertainty': 0.08336005359888077},
                              {'irradiance': 1360.7667236328125,
                               'time': '1997-166-06',
                               'uncertainty': 0.06938306987285614},
                              {'irradiance': 1360.7947998046875,
                               'time': '1997-196-07',
                               'uncertainty': 0.06440741568803787},
                              {'irradiance': 1360.8487548828125,
                               'time': '1997-227-08',
                               'uncertainty': 0.10447163879871368

                               'uncertainty': 0.014647068455815315},
                              {'irradiance': 1360.591796875,
                               'time': '2009-74-03',
                               'uncertainty': 0.01683410257101059},
                              {'irradiance': 1360.5816650390625,
                               'time': '2009-105-04',
                               'uncertainty': 0.013995587825775146},
                              {'irradiance': 1360.639892578125,
                               'time': '2009-135-05',
                               'uncertainty': 0.02759220451116562},
                              {'irradiance': 1360.618896484375,
                               'time': '2009-166-06',
                               'uncertainty': 0.02760385349392891},
                              {'irradiance': 1360.5867919921875,
                               'time': '2009-196-07',
                               'uncertainty': 0.0266764797270298},
   

In [23]:
# Extract JSON data into dataframe using normalize function
TSI_df = json_normalize(TSI_json['nrl2_tsi_P1M']['samples'])

TSI_df.head()
                    

Unnamed: 0,irradiance,time,uncertainty
0,1361.356445,1970-15-01,0.560351
1,1361.090576,1970-46-02,0.642357
2,1361.641235,1970-74-03,0.557128
3,1361.348755,1970-105-04,0.610397
4,1361.238403,1970-135-05,0.664706


In [24]:
#Write the raw data into data folder
TSI_df.to_csv("..\data\TSI_rawdata_fromJSON.csv", index = False)

## Clean the data
 - Step 1 : Extract Year and Month from time column
 - Step 2 : reshape data so that Month is displayed across columns 
 - Step 3: Check for missing values
 - Step 4: If values are missing, replace them with mean for the year
 

In [25]:
# Step 1 : Extract Year and Month from time column
split_yr_mnth = TSI_df['time'].str.split("-")

TSI_df['Year'] = [v[0] for v in split_yr_mnth]
TSI_df['Month'] = [v[2] for v in split_yr_mnth]

TSI_df.head()

Unnamed: 0,irradiance,time,uncertainty,Year,Month
0,1361.356445,1970-15-01,0.560351,1970,1
1,1361.090576,1970-46-02,0.642357,1970,2
2,1361.641235,1970-74-03,0.557128,1970,3
3,1361.348755,1970-105-04,0.610397,1970,4
4,1361.238403,1970-135-05,0.664706,1970,5


In [38]:
#Step 2: reshape data so that Month is displayed across columns 

TSI_df_fnl = TSI_df.pivot_table(index = ['Year'],columns = 'Month', values = 'irradiance')

TSI_df_fnl.reset_index(inplace = True)

# Rename month number to names
mnth_names = {'01':'Jan','02':'Feb','03':'Mar','04':'Apr','05':'May','06':'Jun',\
              '07':'Jul','08':'Aug','09':'Sep','10':'Oct','11':'Nov', '12':'Dec'}

TSI_df_fnl.rename(columns = mnth_names, inplace = True)

TSI_df_fnl.rename_axis("", axis = 1, inplace = True)

TSI_df_fnl.head()

Unnamed: 0,Year,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,1970,1361.356445,1361.090576,1361.641235,1361.348755,1361.238403,1361.541016,1361.328857,1361.303101,1361.335693,1361.390015,1361.366943,1361.5448
1,1971,1361.068115,1361.079346,1361.334229,1361.090576,1361.068359,1361.143066,1360.915405,1360.887695,1361.090698,1360.843506,1360.992798,1360.974854
2,1972,1361.261719,1361.02124,1361.064453,1361.411987,1360.911255,1361.12915,1361.247925,1361.1073,1361.278687,1360.903809,1361.025879,1360.8479
3,1973,1360.994019,1360.989502,1360.784302,1360.887573,1360.996704,1360.930176,1360.851318,1360.848389,1360.680054,1360.842407,1360.782349,1360.796875
4,1974,1360.768555,1360.731689,1360.84668,1360.693481,1360.897705,1360.935059,1360.620239,1360.803101,1360.700195,1360.696045,1360.851929,1360.705933


In [44]:
# Step 3: Check for missing values
#Step 4: Replace missing values with row wise averages for months
if(TSI_df_fnl.isnull().values.sum() != 0):
    print("has missing values")
    
    #check to see if it is in the year
    if(TSI_df_fnl['Year'].isnull().values.sum() != 0):
        print('{TSI_df_fnl["Year"].isnull().values.sum()} Year(s) are missing')
    
    if(TSI_df_fnl['Month'].isnull().values.sum() != 0):
        rowAvg = TSI_df_fnl[['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']].mean(axis = 1)
        # Replace NAN with row-wise mean
        TSI_df_fnl = TSI_df_fnl.apply(lambda r: r.fillna(rowAvg[r.index]))
        
else:
    print("Data has no missing values")

TSI_df_fnl = TSI_df_fnl.round(decimals = 2)
TSI_df_fnl.head()

Data has no missing values


Unnamed: 0,Year,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,1970,1361.36,1361.09,1361.64,1361.35,1361.24,1361.54,1361.33,1361.3,1361.34,1361.39,1361.37,1361.54
1,1971,1361.07,1361.08,1361.33,1361.09,1361.07,1361.14,1360.92,1360.89,1361.09,1360.84,1360.99,1360.97
2,1972,1361.26,1361.02,1361.06,1361.41,1360.91,1361.13,1361.25,1361.11,1361.28,1360.9,1361.03,1360.85
3,1973,1360.99,1360.99,1360.78,1360.89,1361.0,1360.93,1360.85,1360.85,1360.68,1360.84,1360.78,1360.8
4,1974,1360.77,1360.73,1360.85,1360.69,1360.9,1360.94,1360.62,1360.8,1360.7,1360.7,1360.85,1360.71


In [46]:
# Write to CSV 
TSI_df_fnl.to_csv("..\data\TSI_MonthlyAvg.csv", index = False)