In [49]:
import requests
import json
import numpy as np
import pandas as pd
from copy import copy
import sys
sys.path.append('/home/ndsviriden/MinMax94/src/utils') 
from Preprocessing import Preprocessor
from constants import RUSSIAN_TIME_ZONES, rp5_columns, map_wind_dir_rp5_to_mmx, map_cloudiness_rp5_to_mmx, \
                    map_precip_code_rp5_to_mmx, map_precip_count_rp5_to_mmx, map_visibility_rp5_to_mmx, \
                    MmxColumns, RP5Columns, mmx_meteo_columns
from Preprocessing import vincenty_dist

mypath = '/mnt/HARD/MinMax94/data/CSV'

## Convert RP5 to Mmx

* Loading dataframes with information about stations

In [50]:
stations_mm94 = pd.read_csv(mypath+'/stations_mm94_def.csv')
stations_rp5 = pd.read_csv(mypath+'/stations_rp5_def.csv')

* Choosing one mm94 station and finding the closeat wmo station

In [51]:
## find nearest wmo station
mm94_station_id = 1821
station_data = stations_mm94[stations_mm94['station_id']==mm94_station_id]
mm94_coords = station_data[['latitude', 'longitude']].values[0]

nearest_rp5_station_id = stations_rp5.loc[[stations_rp5.apply(
                                lambda x: vincenty_dist(mm94_coords, (x['latitude'], x['longitude'])), 
                                axis=1).idxmin()]]['station_id'].values[0]

* Loading data from the nearest wmo station

In [52]:
rp5 = pd.read_csv(mypath + '/RP5/'+ str(wmo_station_id) + '.csv', sep=';', skiprows=6, index_col=False,
                 dtype={'VV': str, 'RRR': str})

date_time_col = [col for col in rp5.columns if col.startswith('Местное время')][0]
rp5 = rp5.rename(columns={date_time_col: 'Местное время'})
rp5['station_id'] = wmo_station_id

#columns which are absent for the exact wmo station
absent_columns = [col for col in rp5_columns if col not in rp5.columns]
rp5[absent_columns] = np.nan

# leave only columns which are needed (from constants)
rp5 = rp5[rp5_columns]
rp5.head()

Unnamed: 0,DD,Td,WW,tR,W1,N,U,P,T,Ff,Местное время,ff10,RRR,station_id,VV
0,"Ветер, дующий с юго-юго-востока",2.3,,,,Облаков нет.,64.0,763.1,8.8,1.0,01.01.2017 23:00,,,38987,4.0
1,"Штиль, безветрие",2.1,,,,"90 или более, но не 100%",55.0,762.0,10.8,0.0,01.01.2017 20:00,,,38987,4.0
2,"Штиль, безветрие",2.5,,,,"90 или более, но не 100%",44.0,761.8,14.8,0.0,01.01.2017 17:00,,,38987,4.0
3,"Ветер, дующий с западо-юго-запада",1.1,,,,40%.,46.0,763.2,12.4,1.0,01.01.2017 14:00,,,38987,4.0
4,"Ветер, дующий с запада",0.9,,,,20–30%.,68.0,767.2,6.5,2.0,01.01.2017 11:00,,,38987,4.0


* Converting from RP5 to Mmx format

In [121]:
def add_utc(df_raw, station_def_path='/mnt/HARD/MinMax94/data/data_all/CSV/stations_mm94_def.csv'):

    station_def = pd.read_csv(station_def_path)

    def utc_time(df, station_id):
        timezone = station_def['timezone'][station_def[MmxColumns.STATION_ID] == station_id].values[0]
        df[MmxColumns.DATE_TIME_UTC] = pd.to_datetime(df[MmxColumns.DATE_TIME_LOCAL] - pd.Timedelta(str(timezone) + 'h'))
        return df

    df_with_utc = df_raw.groupby(MmxColumns.STATION_ID).apply(lambda df: utc_time(df, df.name))
    return df_with_utc

def rp5_datetime_to_mmx_format(datetime_rp5):
    date, time = datetime_rp5.split(' ')
    date = '-'.join(date.split('.')[::-1])
    time = time + ':00'
    datetime_standard = date + ' ' + time
    return datetime_standard

def convert_rp5_to_mmx(df_rp5):
    df = copy(df_rp5)
    df_mmx = pd.DataFrame(index=df.index, columns=mmx_meteo_columns)
    
    # station_id column
    df_mmx[MmxColumns.STATION_ID] = df[RP5Columns.STATION_ID]
    
    # date_time column
    df_mmx[MmxColumns.DATE_TIME_LOCAL] = pd.to_datetime(df[RP5Columns.DATE_TIME_LOCAL].
                                                        apply(rp5_datetime_to_mmx_format))
    
    # wind_direction
    df_mmx[MmxColumns.WIND_DIRECTION] = df[RP5Columns.WIND_DIRECTION].replace(map_wind_dir_rp5_to_mmx)
    
    # precipitation code
    df_mmx[MmxColumns.PRECIPITATION_CODE] = pd.to_numeric(df[RP5Columns.PRECIPITATION_CODE].
                                                          replace(map_precip_code_rp5_to_mmx))
    
    # precipitation intensity
    df_mmx[MmxColumns.PRECIPITATION_INTENSITY] = pd.to_numeric(df[RP5Columns.PRECIPITATION_INTENSITY]                                                           .replace(map_precip_count_rp5_to_mmx)) / \
                                                               df[RP5Columns.PRECIPITATION_INTERVAL]
    # visibility
    df_mmx[MmxColumns.VISIBILITY] = 1000 * pd.to_numeric(df[RP5Columns.VISIBILITY].
                                                         replace(map_visibility_rp5_to_mmx))
    
    # cloudiness
    df_mmx[MmxColumns.CLOUDINESS] = pd.to_numeric(df[RP5Columns.CLOUDINESS].
                                                 replace(map_cloudiness_rp5_to_mmx))
    # t_air
    df_mmx[MmxColumns.AIR_TEMPERATURE] = df[RP5Columns.AIR_TEMPERATURE]
    
    # humidity
    df_mmx[MmxColumns.HUMIDITY] = df[RP5Columns.HUMIDITY]
    
    # wind_speed
    df_mmx[MmxColumns.WIND_SPEED] =  df[RP5Columns.WIND_SPEED]
    
    # maximum wind speed
    df_mmx[MmxColumns.WIND_MAX_SPEED] = df[RP5Columns.WIND_SPEED]
    
    # dew point temperature
    df_mmx[MmxColumns.DEW_POINT_TEMPERATURE] = df[RP5Columns.DEW_POINT_TEMPERATURE]
    
    # pressure
    df_mmx[MmxColumns.PRESSURE] = df[RP5Columns.PRESSURE]
    
    # p_weather
    df_mmx[MmxColumns.P_WEATHER] = df[RP5Columns.P_WEATHER]
    
    # adding utc_time
    df_mmx = add_utc(df_mmx, '/mnt/HARD/MinMax94/data/CSV/stations_rp5_def.csv')
    return df_mmx

## RWIS data

In [10]:
mypath = '/mnt/HARD/MinMax94/data/data_all/CSV/Raw_extended/'

# reading loaded csv files from data_csv directory, output is a list (length=number of stations) of raw df
raw = pd.read_csv(mypath + str(mm94_station_id) + '_raw.csv', parse_dates = ['date_time']) 
raw = raw.reset_index(drop=True)

raw_data = preprocessor.SelectFeatures(raw)
raw_data = preprocessor.PivotTable(raw_data)
mmx_data = preprocessor.ConvertData(raw_data, from_format="Raw", to_format="Mmx")
mmx_data = preprocessor.AddUTC(mmx_data)
mmx_patterns = preprocessor.CreatePatternList(mmx_data, max_gap = pd.Timedelta('4h'))
mmx_interpolated = preprocessor.InterpolatePatterns(mmx_patterns)

rwis_df = preprocessor.ConvertData(mmx_interpolated, from_format="Mmx", to_format="Metro")
rwis_df = rwis_df.set_index('date_time_utc')
rwis_df['salinity'] = 0
rwis_df['freezing_point'] = 0
del rwis_df['cloudiness'], rwis_df['station_id']

In [11]:
rwis_previous_delta = pd.Timedelta(12, unit='h')
rwis_data = rwis_df[(rwis_df.index >= pd.Timestamp(2015, 12, 30, 7, 0)) & \
                                (rwis_df.index <= pd.Timestamp(2015, 12, 30, 13, 0))]
rwis_data = rwis_data.set_index('date_time_metro', drop=True)
rwis_data_json = rwis_data.to_dict(orient='index')

## All together

In [12]:
import simplejson
test_path = '/home/ndsviriden/metro_test.txt'
working_path = '/home/ndsviriden/data_metro_1.txt'

data_for_metro = {"station_config": station_config, 
                  "road_config": road_config,
                  "global_forecast": global_forecast_json,
                  "rwis_data": rwis_data_json}

In [21]:
type(data_for_metro["global_forecast"]['2015-12-30 10:00 UTC']['cloudiness'])

float

In [16]:
data_for_metro["global_forecast"]

{'2015-12-30 10:00 UTC': {'cloudiness': 50.0,
  'humidity': 34.0,
  'p_weather': 0.0,
  'precipitation_intensity': 0.0,
  'precipitation_type': 0.0,
  'pressure': 762.8,
  't_air': 16.8,
  't_dew_point': 0.8,
  'visibility': 4000.0,
  'wind_direction': 247.5,
  'wind_speed': 1.7},
 '2015-12-30 11:00 UTC': {'cloudiness': 60.0,
  'humidity': 35.0,
  'p_weather': 0.0,
  'precipitation_intensity': 0.0,
  'precipitation_type': 0.0,
  'pressure': 761.9,
  't_air': 17.1,
  't_dew_point': 1.5,
  'visibility': 4000.0,
  'wind_direction': 270.0,
  'wind_speed': 1.3},
 '2015-12-30 13:00 UTC': {'cloudiness': 80.0,
  'humidity': 42.7,
  'p_weather': 0.0,
  'precipitation_intensity': 0.0,
  'precipitation_type': 0.0,
  'pressure': 761.5,
  't_air': 15.0,
  't_dew_point': 2.0,
  'visibility': 4000.0,
  'wind_direction': 210.0,
  'wind_speed': 1.0},
 '2015-12-30 14:00 UTC': {'cloudiness': 80.0,
  'humidity': 49.3,
  'p_weather': 0.0,
  'precipitation_intensity': 0.0,
  'precipitation_type': 0.0,
  'pr

In [13]:
#------   
with open(working_path, "r") as file:
    working_data = json.load(file)

In [14]:
with open(test_path, "w") as file:
    simplejson.dump(data_for_metro, file, indent=4, ignore_nan=True)
    
with open(test_path, "r") as file:
    data_for_metro = json.load(file)

In [154]:
k1 = data_for_metro['global_forecast']['2015-12-30 10:00 UTC'].keys()
k2 = working_data['global_forecast']['2015-12-30 10:00 UTC'].keys()

In [32]:
new = '/home/ndsviriden/just_six.txt'
r = {"value": [{"1": 1, "2": 2}, {"3": 3}]}

with open(new, "w") as file:
    simplejson.dump(r, file, indent=4, ignore_nan=True)

In [159]:
data_for_metro['global_forecast']['2015-12-30 10:00 UTC']

{'cloudiness': 51.7,
 'humidity': 34.0,
 'p_weather': 0.0,
 'precipitation_intensity': 0.0,
 'precipitation_type': 0.0,
 'pressure': 762.8,
 't_air': 16.8,
 't_dew_point': 0.8,
 'visibility': 4000.0,
 'wind_direction': 247.5,
 'wind_speed': 1.7}