In [10]:
from datetime import datetime

import pandas as pd
import numpy as np

import codecs, json 

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

from sklearn.model_selection import train_test_split

import xgboost as xgb

from sklearn.metrics import mean_squared_error

import scipy

In [2]:
######################### METHODS ###################################

In [2]:
def parse_data(filename):
    with open(filename) as json_file:
        json_data = json.load(json_file)
    
    frames = []
    for j in range(len(json_data['tables'])):
        df = pd.DataFrame(np.array(json_data['tables'][j]['table'])[:,:], 
                           columns = json_data['tables'][j]['header']['variables'][:])
        df['state'] = json_data['tables'][j]['header']['flightphase']
        if df['state'][0] == 'cruise':
            frames.append(df)
    
    df = pd.concat(frames,ignore_index=True)
    return df

In [4]:
def getInterpolation(temp, alt, mass, speed, objective, filename):
    df = parse_data(filename)
    df = df[['DISA','ALTITUDE','MASS','MACH', objective]]
    
    test = pd.concat([temp, alt, mass, speed, objective])
    df.append(test) # where to place points? or does not matter... (see previous tests)
    
    return ((df.interpolate()).iloc[test.index.values])[objective]
    

In [5]:
####################################################################

In [3]:
with open('/Users/calmaleh/Desktop/school/project_course/jeppesen/data_rich_ac.bsad') as json_file:
    json_data = json.load(json_file)

In [4]:
# json_data is a disc 
# The tables is a list and header is dict
# The tables list contains dict -> This dict contains the data
#json_data['tables'][1]['table']


In [5]:

frames = []
for j in range(len(json_data['tables'])):
    df = pd.DataFrame(np.array(json_data['tables'][j]['table'])[:,:], 
                           columns = json_data['tables'][j]['header']['variables'][:])
    df['state'] = json_data['tables'][j]['header']['flightphase']
    if df['state'][0] == 'cruise':
        frames.append(df)
    
df = pd.concat(frames,ignore_index=True)
df = df[['DISA','ALTITUDE','MASS','MACH','FUELFLOW']]

In [6]:
X = df.drop(['FUELFLOW'], axis=1)
y = df.FUELFLOW

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

#X_train.insert(4, 'FUELFLOW', y_train, False)
#X = X_train.append(X_test)
#X = X.sort_index(axis=0)
test = X_test.iloc[0]
y_check = y_test.iloc[0]


In [7]:
X_train['distance'] = X_train.sub(test).pow(2).sum(1).pow(0.5) # maybe cutoff based on input

end = 16
z = X_train.sort_values('distance').iloc[0:end]
z = z.drop(['distance'],axis = 1)
z = z.reset_index()
z.columns = ['index_', 'DISA', 'ALTITUDE', 'MASS', 'MACH']
y_train.columns = ['index_', 'FUELFLOW']
z = z.join(y_train, on = 'index_', how = 'left')
z = z.drop(['index_'], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [8]:
z_inter = z[0:1].append(test)
z_inter = z_inter.append(z[2:end])
z_inter

y_inter = z_inter.interpolate(method = 'linear')['FUELFLOW']
print(y_inter)
print(y_inter.iloc[1])
print(y_check)
print(y_inter.iloc[1] - y_check)

0        0.383028
44933    0.385122
2        0.387215
3        0.390016
4        0.393398
5        0.400192
6        0.409123
7        0.436958
8        0.454090
9        0.457943
10       0.472972
11       0.513770
12       0.543467
13       0.543467
14       0.543467
15       0.543467
Name: FUELFLOW, dtype: float64
0.3851215
0.379251
0.005870500000000001


In [None]:
################### PANDAS METHOD ######################
# note: steps that involve the training/testing splits will have to be removed/modified in final version

In [9]:
def pandas_interpol(temp, alt, mass, speed, objective):
    point = pandas.Dataframe(data = {'DISA': [temp], 'ALTITUDE': [alt], 'MASS': [mass], 'MACH': [speed],
                                    objective: None})
    
    def sort_after_distances(dataset, point):
        dataset['distance'] = distance.sub(point).pow(2).sum(1).pow(0.5)    
        dist_sorted_dataset = dataset.sort_values('distance')     
        return dataset
    
    nbr_neighbours = 16 # number of closest neighbours
    df = sort_after_distances(X_train, point) # sort indices based on euclidean distance from point in question
    df = df[0:nbr_neighbours] # discard all but the closest points given by nbr_neighbours
    df = df.drop(['distance'],axis = 1) # drop distance vector
    df = df.reset_index()
    df.columns = ['index_', 'DISA', 'ALTITUDE', 'MASS', 'MACH']
    y_train.columns = ['index_', objective]
    df = df.join(y_train, on = 'index_', how = 'left') # add objective column
    df = df.drop(['index_'], axis=1)

    # add input point between closest and second closest neighbours in dataframe
    df_interp = df[0:1].append(test)
    df_interp = df_interp.append(df[2:end])
    
    # perform pandas' linear interpolation
    y_interp = df_interp.interpolate(method = 'linear')[objective]
    interpolated_objective = y_interp.iloc[1]
    
    return interpolated_objective