In [1]:
import json
import numpy as np
import pandas as pd
from copy import copy
import sys
sys.path.append('/home/ndsviriden/MinMax94/src/utils') 
from Preprocessing import Preprocessor
from constants import RUSSIAN_TIME_ZONES, rp5_columns
from converters import convert_rp5_to_mmx, convert_mmx_to_mmcc_forecast, convert_raw_to_mmx
from loaders import load_rp5_stations, load_mm94_stations
from Preprocessing import vincenty_dist

mypath = '/mnt/HARD/MinMax94/data/CSV'

## Convert RP5 to Mmx

* Load dataframes with information about stations

In [2]:
stations_mm94 = pd.read_csv(mypath+'/stations_mm94_def.csv')
stations_rp5 = pd.read_csv(mypath+'/stations_rp5_def.csv')

* Choose one mm94 station and finding the closeat wmo station

In [3]:
## find nearest wmo station
mm94_station_id = 1821
station_data = stations_mm94[stations_mm94['station_id']==mm94_station_id]
mm94_coords = station_data[['latitude', 'longitude']].values[0]

wmo_station_id = stations_rp5.loc[[stations_rp5.apply(
                                lambda x: vincenty_dist(mm94_coords, (x['latitude'], x['longitude'])), 
                                axis=1).idxmin()]]['station_id'].values[0]

* Load data from the nearest wmo station

In [4]:
rp5 = load_rp5_stations(wmo_station_id)

* Convert from RP5 to Mmx format

In [5]:
mmx = convert_rp5_to_mmx(rp5)
mmx.head()

Unnamed: 0,data_dew_point,date_time,data_pressure,station_id,data_wind_velocity,data_t_air,data_precip_code,data_precip_count,data_wind_dir,date_time_utc,data_cloudiness,data_p_weather,data_freezing_point,data_dampness,data_visibility,data_wind_speedmax
14927,-5.4,2012-01-01 02:00:00,769.1,38987,1.0,-3.3,0,,22.5,2011-12-31 21:00:00,0.0,0,,85.0,4000.0,
14926,-6.8,2012-01-01 05:00:00,768.8,38987,0.0,-4.2,0,,0.0,2012-01-01 00:00:00,0.0,0,,82.0,4000.0,
14925,-5.6,2012-01-01 08:00:00,767.7,38987,1.0,-3.5,0,,67.5,2012-01-01 03:00:00,0.0,0,,85.0,4000.0,
14924,-4.8,2012-01-01 11:00:00,766.5,38987,1.0,3.1,0,,90.0,2012-01-01 06:00:00,0.0,0,,56.0,4000.0,
14923,-11.7,2012-01-01 14:00:00,761.8,38987,3.0,15.8,0,,90.0,2012-01-01 09:00:00,75.0,0,,14.0,4000.0,


* Convert from Mmx to MmccForecast format

In [6]:
mmcc_forecast = convert_mmx_to_mmcc_forecast(mmx)
mmcc_forecast.head()

Unnamed: 0,t_dew_point,pressure,station_id,wind_speed,p_weather,t_air,precipitation_type,date_time_metro,cloudiness,precipitation_intensity,humidity,visibility,wind_direction,date_time_utc
14927,-5.4,769.1,38987,1.0,0,-3.3,0,2011-12-31 21:00 UTC,0.0,,85.0,4000.0,22.5,2011-12-31 21:00:00
14926,-6.8,768.8,38987,0.0,0,-4.2,0,2012-01-01 00:00 UTC,0.0,,82.0,4000.0,0.0,2012-01-01 00:00:00
14925,-5.6,767.7,38987,1.0,0,-3.5,0,2012-01-01 03:00 UTC,0.0,,85.0,4000.0,67.5,2012-01-01 03:00:00
14924,-4.8,766.5,38987,1.0,0,3.1,0,2012-01-01 06:00 UTC,0.0,,56.0,4000.0,90.0,2012-01-01 06:00:00
14923,-11.7,761.8,38987,3.0,0,15.8,0,2012-01-01 09:00 UTC,80.0,,14.0,4000.0,90.0,2012-01-01 09:00:00


## Convert Raw to Mmx

* Load Raw mm94 data

In [7]:
raw = load_mm94_stations(mm94_station_id)
raw.head()

Unnamed: 0,id,station_id,date_time,data,sensor_type_id,type,sensor_id,active_state
0,6996668,1821,2008-03-23 15:39:01,,17,cloudiness,957,A
1,6996669,1821,2008-03-23 15:39:01,926.0,4,dampness,953,A
2,6996670,1821,2008-03-23 15:39:01,-43.0,2,t_road,955,A
3,6996672,1821,2008-03-23 15:39:01,-64.0,13,dew_point,954,A
4,6996673,1821,2008-03-23 15:39:01,-54.0,1,t_air,919,A


* Convert Raw to Mmx

In [20]:
mmx = convert_raw_to_mmx(raw)
mmx.head()

['data_dew_point', 'date_time', 'data_pressure', 'station_id', 'data_wind_velocity', 'data_t_air', 'data_precip_code', 'data_t_road', 'data_precip_count', 'data_wind_dir', 'date_time_utc', 'data_cloudiness', 'data_p_weather', 'data_t_underroad', 'data_freezing_point', 'data_salinity', 'data_dampness', 'data_visibility', 'data_wind_speedmax']


Unnamed: 0,data_dew_point,date_time,data_pressure,station_id,data_wind_velocity,data_t_air,data_precip_code,data_t_road,data_precip_count,data_wind_dir,date_time_utc,data_cloudiness,data_p_weather,data_t_underroad,data_freezing_point,data_salinity,data_dampness,data_visibility,data_wind_speedmax
0,-6.4,2008-03-23 15:39:01,,1821,,-5.4,,-4.3,,,2008-03-23 12:39:01,,,-4.2,,,92.6,,
1,-5.7,2008-03-23 16:37:01,,1821,,-5.2,,-4.2,,,2008-03-23 13:37:01,,,-4.1,,,95.9,,
2,-5.0,2008-03-23 18:42:43,,1821,,-4.5,,-3.9,,,2008-03-23 15:42:43,,,-4.1,,,96.2,,
3,-3.0,2008-03-23 23:47:59,,1821,,-2.8,,-3.4,,,2008-03-23 20:47:59,,,-3.7,,,97.9,,
4,-2.1,2008-03-24 03:39:30,,1821,,-2.0,,-3.2,,,2008-03-24 00:39:30,,,-3.4,,,98.8,,


In [9]:
mypath = '/mnt/HARD/MinMax94/data/data_all/CSV/Raw_extended/'


raw_data = preprocessor.SelectFeatures(raw)
raw_data = preprocessor.PivotTable(raw_data)
mmx_data = preprocessor.ConvertData(raw_data, from_format="Raw", to_format="Mmx")
mmx_data = preprocessor.AddUTC(mmx_data)
mmx_patterns = preprocessor.CreatePatternList(mmx_data, max_gap = pd.Timedelta('4h'))
mmx_interpolated = preprocessor.InterpolatePatterns(mmx_patterns)

rwis_df = preprocessor.ConvertData(mmx_interpolated, from_format="Mmx", to_format="Metro")
rwis_df = rwis_df.set_index('date_time_utc')
rwis_df['salinity'] = 0
rwis_df['freezing_point'] = 0
del rwis_df['cloudiness'], rwis_df['station_id']

NameError: name 'preprocessor' is not defined

In [11]:
rwis_previous_delta = pd.Timedelta(12, unit='h')
rwis_data = rwis_df[(rwis_df.index >= pd.Timestamp(2015, 12, 30, 7, 0)) & \
                                (rwis_df.index <= pd.Timestamp(2015, 12, 30, 13, 0))]
rwis_data = rwis_data.set_index('date_time_metro', drop=True)
rwis_data_json = rwis_data.to_dict(orient='index')

## All together

In [12]:
import simplejson
test_path = '/home/ndsviriden/metro_test.txt'
working_path = '/home/ndsviriden/data_metro_1.txt'

data_for_metro = {"station_config": station_config, 
                  "road_config": road_config,
                  "global_forecast": global_forecast_json,
                  "rwis_data": rwis_data_json}

In [21]:
type(data_for_metro["global_forecast"]['2015-12-30 10:00 UTC']['cloudiness'])

float

In [16]:
data_for_metro["global_forecast"]

{'2015-12-30 10:00 UTC': {'cloudiness': 50.0,
  'humidity': 34.0,
  'p_weather': 0.0,
  'precipitation_intensity': 0.0,
  'precipitation_type': 0.0,
  'pressure': 762.8,
  't_air': 16.8,
  't_dew_point': 0.8,
  'visibility': 4000.0,
  'wind_direction': 247.5,
  'wind_speed': 1.7},
 '2015-12-30 11:00 UTC': {'cloudiness': 60.0,
  'humidity': 35.0,
  'p_weather': 0.0,
  'precipitation_intensity': 0.0,
  'precipitation_type': 0.0,
  'pressure': 761.9,
  't_air': 17.1,
  't_dew_point': 1.5,
  'visibility': 4000.0,
  'wind_direction': 270.0,
  'wind_speed': 1.3},
 '2015-12-30 13:00 UTC': {'cloudiness': 80.0,
  'humidity': 42.7,
  'p_weather': 0.0,
  'precipitation_intensity': 0.0,
  'precipitation_type': 0.0,
  'pressure': 761.5,
  't_air': 15.0,
  't_dew_point': 2.0,
  'visibility': 4000.0,
  'wind_direction': 210.0,
  'wind_speed': 1.0},
 '2015-12-30 14:00 UTC': {'cloudiness': 80.0,
  'humidity': 49.3,
  'p_weather': 0.0,
  'precipitation_intensity': 0.0,
  'precipitation_type': 0.0,
  'pr

In [13]:
#------   
with open(working_path, "r") as file:
    working_data = json.load(file)

In [14]:
with open(test_path, "w") as file:
    simplejson.dump(data_for_metro, file, indent=4, ignore_nan=True)
    
with open(test_path, "r") as file:
    data_for_metro = json.load(file)

In [154]:
k1 = data_for_metro['global_forecast']['2015-12-30 10:00 UTC'].keys()
k2 = working_data['global_forecast']['2015-12-30 10:00 UTC'].keys()

In [32]:
new = '/home/ndsviriden/just_six.txt'
r = {"value": [{"1": 1, "2": 2}, {"3": 3}]}

with open(new, "w") as file:
    simplejson.dump(r, file, indent=4, ignore_nan=True)

In [159]:
data_for_metro['global_forecast']['2015-12-30 10:00 UTC']

{'cloudiness': 51.7,
 'humidity': 34.0,
 'p_weather': 0.0,
 'precipitation_intensity': 0.0,
 'precipitation_type': 0.0,
 'pressure': 762.8,
 't_air': 16.8,
 't_dew_point': 0.8,
 'visibility': 4000.0,
 'wind_direction': 247.5,
 'wind_speed': 1.7}