In [1]:
!pip install sentence-transformers
from sentence_transformers import SentenceTransformer
model_sentence = SentenceTransformer("paraphrase-MiniLM-L6-v2")



  from .autonotebook import tqdm as notebook_tqdm


In [100]:
import joblib 
joblib.dump(model_sentence, 'model_sentence.pkl')

['model_sentence.pkl']

In [15]:
import torch
torch.save(model_sentence, 'model_torch_sentence.pth')

In [2]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [3]:
df_original = pd.read_csv('clustered_data_v2.csv')

In [4]:
pd.set_option('display.max_columns', None)

In [5]:
df_original.head()

Unnamed: 0,price,bedrooms,bathrooms,sqft_lot_normalized,floors,waterfront,view,condition,yr_built,yr_renovated,zipcode,lat,long,nearest_station_distance_km,bathroom_category,density,commute_time,distance_to_point_km,is_near_shore,grade_living_normalized,bedrooms_cat,lot_size_category,grade_living_category,outliers_ecod,cluster_all_data,cluster_no_outlier,sqft_lot,grade_living
0,221900.0,3,1.0,8.639588,1.0,0,0,3,1955,0,98178,47.5112,-122.257,8620.002287,Studio or 1 Bathroom,5364.0,32.9,0.595763,False,9.019301,3-4,0,0,0,1,1.0,5650,8260
1,538000.0,3,2.25,8.887791,2.0,0,0,3,1951,1991,98125,47.721,-122.319,2078.181042,2-3 Bathrooms,7999.0,28.9,3.038596,False,9.797627,3-4,1,2,0,0,0.0,7242,17990
2,180000.0,2,1.0,9.21044,1.0,0,0,3,1933,0,98028,47.7379,-122.233,8109.552186,Studio or 1 Bathroom,3833.0,33.1,2.317669,False,8.438366,<3,2,0,0,1,1.0,10000,4620
3,604000.0,4,3.0,8.517393,1.0,0,0,5,1965,0,98136,47.5208,-122.393,8918.318639,2-3 Bathrooms,7517.0,30.6,0.101668,True,9.526683,3-4,0,1,0,2,0.0,5000,13720
4,510000.0,3,2.0,8.997271,1.0,0,0,3,1987,0,98074,47.6168,-122.045,19789.800401,1-2 Bathrooms,2896.0,32.7,1.750656,False,9.506065,3-4,1,1,0,0,2.0,8080,13440


In [6]:
cluster_columns = ['bathroom_category', 'lot_size_category', 'floors','view', 'condition', 'is_near_shore','grade_living_category']

# XGBOOST

In [123]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

X = df_original[cluster_columns]
y = df_original['cluster_all_data']

# rename X columns for < and > signs
X.columns = X.columns.str.replace('<', 'less_than')
X.columns = X.columns.str.replace('>', 'greater_than')

numerical_cols = ['floors','view', 'condition']
categorical_cols = ['bathroom_category', 'lot_size_category','grade_living_category']

# Assume X is your features and y is your target


# Assume numerical_cols is a list of numerical column names, and categorical_cols is a list of categorical column names
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)])

# Preprocessing
X = preprocessor.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [124]:
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report


# Assuming embeddings and labels are your data and target variables respectively
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an XGBoost model
model = xgb.XGBClassifier(objective='multi:softmax', eval_metric='logloss')  # Change objective to 'multi:softprob' for multi-class

# Train the model
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluation
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(classification_report(y_test, y_pred))


Accuracy: 0.9070090215128384
              precision    recall  f1-score   support

           0       0.90      0.98      0.94      3115
           1       1.00      1.00      1.00       781
           2       0.57      0.23      0.33       427

    accuracy                           0.91      4323
   macro avg       0.82      0.73      0.75      4323
weighted avg       0.89      0.91      0.89      4323



In [60]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 6, 9],
    'learning_rate': [0.01, 0.05, 0.1],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9]
}

model = xgb.XGBClassifier(objective='multi:softmax', eval_metric='logloss') 

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

print(f'Best parameters: {grid_search.best_params_}')
y_pred = grid_search.best_estimator_.predict(X_test)

print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(classification_report(y_test, y_pred))


Fitting 5 folds for each of 243 candidates, totalling 1215 fits


Best parameters: {'colsample_bytree': 0.7, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.8}
Accuracy: 0.7957436965070552
              precision    recall  f1-score   support

           0       0.81      0.94      0.87      3115
           1       0.73      0.54      0.62       781
           2       0.63      0.22      0.33       427

    accuracy                           0.80      4323
   macro avg       0.72      0.57      0.61      4323
weighted avg       0.78      0.80      0.77      4323



# Neural Networks

In [74]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import numpy as np

X = df_original[cluster_columns]
y = df_original['cluster_all_data']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.columns = X_train.columns.str.replace(r'[<>]', lambda x: 'less_than' if x.group() == '<' else 'greater_than', regex=True)
X_test.columns = X_test.columns.str.replace(r'[<>]', lambda x: 'less_than' if x.group() == '<' else 'greater_than', regex=True)


numerical_cols = ['floors','view', 'condition']
categorical_cols = ['bedrooms_cat', 'lot_size_category','grade_living_category']
# Assume X is your features and y is your target

# Assume numerical_cols is a list of numerical column names, and categorical_cols is a list of categorical column names
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)])

# Preprocessing
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

encoder = OneHotEncoder()
y_train = encoder.fit_transform(y_train.values.reshape(-1, 1)).toarray()
y_test = encoder.transform(y_test.values.reshape(-1, 1)).toarray()

# Convert one-hot encoded labels to 1D arrays
y_train = np.argmax(y_train, axis=1)
y_test = np.argmax(y_test, axis=1)


print(X_train.shape, y_train.shape)



(17290, 12) (17290,)


In [84]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report


# Encode labels if they are categorical
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

# Build the model
model = Sequential()
model.add(Dense(128, input_dim=12, activation='relu'))  # Adjust input_dim to match the number of features
model.add(Dropout(0.5))  # Dropout layer to prevent overfitting
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))  # Output layer for multiclass classification with 3 clas


# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])  # Use 'categorical_crossentropy' for multi-class

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Predict
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

print(classification_report(y_test, y_pred))

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 829us/step - accuracy: 0.7321 - loss: 0.6838 - val_accuracy: 0.7741 - val_loss: 0.5185
Epoch 2/20
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 675us/step - accuracy: 0.7777 - loss: 0.5471 - val_accuracy: 0.7848 - val_loss: 0.5089
Epoch 3/20
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 770us/step - accuracy: 0.7879 - loss: 0.5218 - val_accuracy: 0.7828 - val_loss: 0.4994
Epoch 4/20
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 699us/step - accuracy: 0.7848 - loss: 0.5240 - val_accuracy: 0.7880 - val_loss: 0.4971
Epoch 5/20
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 644us/step - accuracy: 0.7785 - loss: 0.5212 - val_accuracy: 0.7796 - val_loss: 0.4966
Epoch 6/20
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 611us/step - accuracy: 0.7799 - loss: 0.5208 - val_accuracy: 0.7955 - val_loss: 0.4943
Epoch 7/20
[1m433/433[0m 

# LLM + XGBOOST

In [None]:
def compile_text(x):
    text =  f"""Bedrooms: {x['bedrooms_cat']}, 
                Lot Size (sqft): {x['lot_size_category']}, 
                Floors: {x['floors']}, 
                View: {x['view']}, 
                Condition: {x['condition']}, 
                Bathroom Category: {x['bathroom_category']}, 
                Near Shore: {x['is_near_shore']}, 
                Grade Living: {x['grade_living_category']}
            """
    return text

In [7]:
def compile_text(x):
    text =  f"""Bathroom Category: {x['bathroom_category']}, 
                Near Shore: {x['is_near_shore']}, 
            """
    return text

In [8]:
sentences = df_original.apply(lambda x: compile_text(x), axis=1).tolist()

In [9]:
output = model_sentence.encode(sentences=sentences, show_progress_bar= True, normalize_embeddings  = True)

Batches: 100%|██████████| 676/676 [00:12<00:00, 54.88it/s]


In [10]:
output[1].shape

(384,)

In [11]:
df_embedding = pd.DataFrame(output)

In [12]:
df_embedding

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383
0,0.001881,-0.047290,0.073722,0.011747,-0.027247,-0.016082,-0.058573,-0.029313,0.035192,0.044149,-0.002745,-0.102086,0.079029,0.046736,0.020035,-0.037034,0.045304,0.003064,0.005518,0.040733,0.048064,0.055127,-0.044484,0.020897,0.040543,-0.003770,-0.035720,0.044637,0.009856,-0.083843,-0.071919,-0.005554,-0.059270,-0.014939,0.065378,-0.027292,-0.052032,0.005903,0.012294,-0.033045,-0.041202,0.042874,-0.001613,0.038566,0.012130,0.063305,-0.043766,-0.000777,-0.012668,0.008737,0.054265,0.041289,-0.040580,0.100461,-0.067998,-0.044571,-0.099581,-0.015500,-0.081397,0.037531,-0.004924,0.049479,-0.087074,0.072380,-0.022246,0.095600,0.011049,0.051625,0.020084,0.003119,-0.035710,-0.007741,-0.083184,0.004589,0.043884,-0.039086,0.007067,0.058822,-0.043758,-0.022541,-0.041367,-0.054098,0.052061,0.045308,-0.030424,-0.030677,-0.041067,0.014950,-0.052664,-0.048059,-0.020601,-0.030324,0.011030,-0.047675,0.008113,-0.037856,-0.102813,0.027609,0.066042,0.004160,-0.046952,-0.019739,-0.016845,-0.038853,0.095002,-0.019060,0.030924,0.011776,-0.002012,0.048162,0.003633,-0.065250,0.008367,-0.068894,-0.018336,-0.027860,0.073091,-0.054308,-0.046734,0.001171,0.023461,-0.012699,0.008501,0.002351,-0.062857,-0.016105,-0.020568,0.019448,0.013467,-0.042975,0.029098,-0.024237,0.076241,0.048447,0.054392,0.069461,0.055439,0.003006,0.094572,0.004700,-0.074781,0.013517,0.008267,0.023986,-0.021836,0.057409,-0.083145,0.008059,-0.003399,0.067897,0.018654,0.085905,0.050988,-0.083571,-0.040075,-0.063140,-0.035657,0.068806,-0.036812,-0.015583,-0.003421,0.027547,-0.046234,-0.009961,-0.067576,0.005955,-0.058931,-0.090702,-0.003322,0.031617,0.058533,0.048530,0.121001,-0.008023,-0.028199,0.016124,-0.006789,0.009613,0.020967,0.085329,-0.083556,0.119142,0.005298,-0.041754,0.044433,0.026071,-0.017281,0.056710,-0.018535,0.119274,-0.039392,-0.068618,0.010662,0.021315,0.005878,0.053222,0.089602,-0.115902,-0.121481,-0.070433,-0.048345,0.074339,-0.006837,-0.018117,0.024900,-0.017777,0.012897,0.020066,-0.075821,0.052528,-0.016137,0.088185,-0.132549,-0.066378,0.040378,0.017280,-0.032639,-0.013031,0.018404,-0.004449,0.002405,-0.019861,0.011933,-0.056640,0.016069,-0.027150,-0.049485,0.088610,-0.010103,-0.000118,0.036867,-0.057338,0.000514,0.057461,-0.077601,0.086519,-0.005886,-0.012882,0.004315,0.054886,0.045777,-0.043640,-0.062697,0.178492,-0.027090,0.027352,0.016352,-0.033960,-0.008090,0.117862,-0.087594,0.040700,0.002433,0.048692,-0.103476,0.047403,-0.003210,-0.040199,0.026832,-0.044416,0.039274,0.017298,-0.089811,0.023662,0.092844,-0.062318,-0.062264,0.045853,-0.028575,-0.013734,-0.045252,-0.143896,-0.021359,0.025842,-0.069696,0.022741,-0.040644,-0.033262,0.024046,-0.106638,-0.016958,-0.053011,-0.061080,0.069121,0.087575,0.049147,0.008139,0.061070,0.032872,-0.014893,0.070636,0.082444,-0.065694,0.005419,-0.087427,-0.003309,0.087882,-0.038186,0.021625,0.037336,-0.014454,0.010613,-0.086073,0.039010,-0.088331,0.084498,-0.025872,0.078430,0.065417,0.009929,-0.012737,-0.007683,-0.003319,0.040620,0.017637,0.032409,0.000861,-0.033408,0.019888,0.005132,0.022628,0.118474,0.019484,-0.022411,-0.045812,-0.044929,-0.117390,0.046312,-0.057156,0.109774,0.032539,0.014033,0.035037,-0.036857,-0.000353,-0.011049,0.004956,-0.012156,-0.021163,-0.063705,-0.006343,0.012173,0.025496,-0.006829,-0.013482,0.019645,-0.044118,0.039574,-0.038442,0.005082,0.101648,0.013258,-0.053399,0.013822,0.012418,-0.032361,0.038526,-0.026542,-0.009807,-0.116941,-0.018092,-0.040861,-0.038600,0.036694,0.101301,0.047734,0.114759,0.031505,-0.074661,0.027318,-0.024822,0.022410,-0.034013,0.083949,0.046222,0.003524,-0.009101,-0.065740,-0.029793,0.071496,0.044120,0.006548,-0.031904,0.004339
1,0.019835,-0.035054,0.069809,0.043650,-0.013110,-0.011371,-0.016916,-0.063990,0.028327,0.012926,0.023116,-0.112367,0.109421,0.043949,0.005588,-0.042243,0.017976,0.017695,0.002774,0.026486,0.056448,0.029386,-0.062008,0.034393,0.019666,0.007466,-0.028781,0.069776,-0.039545,-0.091148,-0.069550,-0.012627,-0.091635,-0.013993,0.085297,-0.075184,-0.051855,-0.015434,0.014197,-0.013228,-0.028940,0.010345,0.001487,0.025445,-0.016213,0.106815,-0.042449,0.030779,0.010692,-0.029951,0.021119,0.080869,-0.026884,0.117054,-0.053842,-0.061098,-0.096989,-0.007509,-0.099874,0.010468,0.037034,0.098057,-0.057966,0.065498,-0.055329,0.102027,-0.016386,0.054989,0.037554,0.055724,-0.066108,0.013011,-0.101271,-0.016258,0.006946,-0.016060,-0.004095,0.074005,-0.033586,-0.031704,-0.026775,-0.027005,0.069770,0.090739,-0.014137,-0.073613,-0.044513,0.000478,-0.078885,-0.010320,0.033894,-0.011125,0.035347,-0.048338,0.006910,-0.001920,-0.117851,0.014010,0.071177,0.015991,-0.026057,-0.028561,0.009243,-0.050856,0.115084,0.004552,-0.001076,0.010158,0.028525,0.082525,-0.015744,-0.073808,0.021382,-0.045556,-0.070437,0.012988,0.056711,-0.061952,-0.010202,-0.010088,0.006309,-0.037821,0.017926,0.002020,-0.055270,0.010000,-0.036056,0.028603,0.041538,-0.038824,0.019524,-0.068023,0.038907,0.034894,0.057830,-0.005888,0.064100,0.017190,0.041746,0.020812,-0.087039,-0.036641,0.007943,0.031482,0.001370,0.069663,-0.058486,0.024331,0.007795,0.064902,0.035642,0.060710,0.035456,-0.069934,-0.041806,-0.039842,-0.073718,0.058371,0.001743,0.018565,0.022759,0.006943,-0.015078,-0.028360,-0.071037,0.028794,-0.050749,-0.087789,-0.039199,0.033148,0.060541,0.055571,0.107131,0.013661,-0.034562,-0.012855,-0.027922,0.011721,0.032832,0.058942,-0.077164,0.117130,-0.003079,-0.079974,0.032783,0.036354,-0.035715,0.033143,-0.042546,0.107512,-0.029955,-0.106096,0.033821,0.045430,0.010106,0.067896,0.074614,-0.095625,-0.074255,-0.049040,-0.001680,0.043142,0.016430,-0.034973,0.017718,-0.072033,0.020102,-0.014436,-0.076129,0.026779,-0.004614,0.144679,-0.058885,-0.087722,-0.007165,0.039987,-0.043847,-0.034480,0.034147,0.019036,-0.019015,0.005919,-0.003273,-0.053255,0.035816,0.003176,-0.021446,0.051300,-0.063839,-0.036505,0.039654,-0.006616,0.008359,0.039228,-0.100411,0.097927,0.040701,-0.047881,0.019117,0.068366,0.027408,-0.013801,-0.023776,0.165844,-0.002937,0.018866,0.019714,0.013244,-0.044919,0.143372,-0.023663,-0.012882,-0.016724,0.024929,-0.066268,0.011784,0.002597,-0.036128,0.004375,-0.033612,0.017279,0.027129,-0.082207,0.040723,0.101967,-0.057459,0.008873,0.014982,-0.021366,0.011887,-0.048988,-0.133764,-0.025228,0.064746,-0.077468,-0.012075,-0.068280,-0.016476,0.033559,-0.118348,-0.032470,0.005080,-0.073295,0.063889,0.054033,0.041005,-0.015600,0.089420,0.035970,0.003665,0.040188,0.004858,-0.062346,-0.010238,-0.085853,-0.066288,0.053871,-0.036417,0.061244,0.027678,-0.048346,0.046154,-0.057690,0.070157,-0.056114,0.069086,-0.024223,0.075576,0.083300,-0.012578,-0.024518,-0.014845,0.024538,0.075780,0.012028,0.037998,-0.014682,0.004698,0.013341,-0.004278,0.038549,0.119416,0.017928,-0.003109,-0.063752,-0.046652,-0.082631,0.073976,-0.007535,0.048541,0.029716,0.049166,0.039086,-0.033558,-0.003597,-0.017145,0.026276,-0.062210,0.003075,-0.069425,-0.027260,0.010704,-0.000358,0.027866,-0.000889,0.015120,0.002602,-0.004065,-0.056083,-0.006048,0.094322,-0.032578,-0.033833,0.040107,-0.045720,-0.039829,-0.011782,-0.016159,-0.014722,-0.128765,-0.046117,-0.053664,-0.052299,0.008150,0.059928,0.032431,0.078470,0.035391,-0.060662,0.044157,-0.027157,-0.002566,-0.059282,0.043885,0.005367,-0.027005,0.019347,-0.036204,0.022660,0.039447,0.060313,0.012248,-0.074269,0.026653
2,0.001881,-0.047290,0.073722,0.011747,-0.027247,-0.016082,-0.058573,-0.029313,0.035192,0.044149,-0.002745,-0.102086,0.079029,0.046736,0.020035,-0.037034,0.045304,0.003064,0.005518,0.040733,0.048064,0.055127,-0.044484,0.020897,0.040543,-0.003770,-0.035720,0.044637,0.009856,-0.083843,-0.071919,-0.005554,-0.059270,-0.014939,0.065378,-0.027292,-0.052032,0.005903,0.012294,-0.033045,-0.041202,0.042874,-0.001613,0.038566,0.012130,0.063305,-0.043766,-0.000777,-0.012668,0.008737,0.054265,0.041289,-0.040580,0.100461,-0.067998,-0.044571,-0.099581,-0.015500,-0.081397,0.037531,-0.004924,0.049479,-0.087074,0.072380,-0.022246,0.095600,0.011049,0.051625,0.020084,0.003119,-0.035710,-0.007741,-0.083184,0.004589,0.043884,-0.039086,0.007067,0.058822,-0.043758,-0.022541,-0.041367,-0.054098,0.052061,0.045308,-0.030424,-0.030677,-0.041067,0.014950,-0.052664,-0.048059,-0.020601,-0.030324,0.011030,-0.047675,0.008113,-0.037856,-0.102813,0.027609,0.066042,0.004160,-0.046952,-0.019739,-0.016845,-0.038853,0.095002,-0.019060,0.030924,0.011776,-0.002012,0.048162,0.003633,-0.065250,0.008367,-0.068894,-0.018336,-0.027860,0.073091,-0.054308,-0.046734,0.001171,0.023461,-0.012699,0.008501,0.002351,-0.062857,-0.016105,-0.020568,0.019448,0.013467,-0.042975,0.029098,-0.024237,0.076241,0.048447,0.054392,0.069461,0.055439,0.003006,0.094572,0.004700,-0.074781,0.013517,0.008267,0.023986,-0.021836,0.057409,-0.083145,0.008059,-0.003399,0.067897,0.018654,0.085905,0.050988,-0.083571,-0.040075,-0.063140,-0.035657,0.068806,-0.036812,-0.015583,-0.003421,0.027547,-0.046234,-0.009961,-0.067576,0.005955,-0.058931,-0.090702,-0.003322,0.031617,0.058533,0.048530,0.121001,-0.008023,-0.028199,0.016124,-0.006789,0.009613,0.020967,0.085329,-0.083556,0.119142,0.005298,-0.041754,0.044433,0.026071,-0.017281,0.056710,-0.018535,0.119274,-0.039392,-0.068618,0.010662,0.021315,0.005878,0.053222,0.089602,-0.115902,-0.121481,-0.070433,-0.048345,0.074339,-0.006837,-0.018117,0.024900,-0.017777,0.012897,0.020066,-0.075821,0.052528,-0.016137,0.088185,-0.132549,-0.066378,0.040378,0.017280,-0.032639,-0.013031,0.018404,-0.004449,0.002405,-0.019861,0.011933,-0.056640,0.016069,-0.027150,-0.049485,0.088610,-0.010103,-0.000118,0.036867,-0.057338,0.000514,0.057461,-0.077601,0.086519,-0.005886,-0.012882,0.004315,0.054886,0.045777,-0.043640,-0.062697,0.178492,-0.027090,0.027352,0.016352,-0.033960,-0.008090,0.117862,-0.087594,0.040700,0.002433,0.048692,-0.103476,0.047403,-0.003210,-0.040199,0.026832,-0.044416,0.039274,0.017298,-0.089811,0.023662,0.092844,-0.062318,-0.062264,0.045853,-0.028575,-0.013734,-0.045252,-0.143896,-0.021359,0.025842,-0.069696,0.022741,-0.040644,-0.033262,0.024046,-0.106638,-0.016958,-0.053011,-0.061080,0.069121,0.087575,0.049147,0.008139,0.061070,0.032872,-0.014893,0.070636,0.082444,-0.065694,0.005419,-0.087427,-0.003309,0.087882,-0.038186,0.021625,0.037336,-0.014454,0.010613,-0.086073,0.039010,-0.088331,0.084498,-0.025872,0.078430,0.065417,0.009929,-0.012737,-0.007683,-0.003319,0.040620,0.017637,0.032409,0.000861,-0.033408,0.019888,0.005132,0.022628,0.118474,0.019484,-0.022411,-0.045812,-0.044929,-0.117390,0.046312,-0.057156,0.109774,0.032539,0.014033,0.035037,-0.036857,-0.000353,-0.011049,0.004956,-0.012156,-0.021163,-0.063705,-0.006343,0.012173,0.025496,-0.006829,-0.013482,0.019645,-0.044118,0.039574,-0.038442,0.005082,0.101648,0.013258,-0.053399,0.013822,0.012418,-0.032361,0.038526,-0.026542,-0.009807,-0.116941,-0.018092,-0.040861,-0.038600,0.036694,0.101301,0.047734,0.114759,0.031505,-0.074661,0.027318,-0.024822,0.022410,-0.034013,0.083949,0.046222,0.003524,-0.009101,-0.065740,-0.029793,0.071496,0.044120,0.006548,-0.031904,0.004339
3,0.036104,-0.027240,0.063957,0.043260,-0.001791,-0.004958,-0.021011,-0.067288,0.023985,0.003641,0.004160,-0.109792,0.105730,0.049771,0.020479,-0.029343,0.023531,0.024451,0.006203,0.005993,0.032256,0.026684,-0.039929,0.040452,0.007316,0.004933,-0.015908,0.059093,-0.036557,-0.090135,-0.078560,-0.023306,-0.082854,-0.015557,0.063293,-0.072928,-0.042800,-0.026442,-0.004371,-0.024934,-0.027711,0.017675,0.005540,0.020314,-0.015110,0.103550,-0.051575,0.023336,0.014710,-0.010921,0.045350,0.074604,-0.013398,0.117569,-0.043156,-0.028384,-0.094422,-0.008690,-0.098515,0.000621,0.031959,0.097430,-0.063891,0.059132,-0.056836,0.088678,-0.015169,0.044935,0.005725,0.053307,-0.083843,-0.001881,-0.086125,-0.026262,0.011186,-0.015465,0.012635,0.056660,-0.035405,-0.020788,-0.029131,-0.018809,0.083765,0.091547,-0.018724,-0.074083,-0.042609,0.001269,-0.067686,-0.004597,0.038561,-0.007568,0.015591,-0.061709,0.006324,0.001696,-0.134273,0.004212,0.077929,0.019767,-0.030000,-0.031057,0.008562,-0.067323,0.124702,0.014060,-0.025447,0.026388,0.019158,0.073992,-0.013804,-0.058320,0.032305,-0.038782,-0.048534,0.016669,0.060239,-0.048740,0.000756,0.000692,-0.013334,-0.053533,0.021164,-0.019596,-0.053083,-0.004988,-0.015666,0.043779,0.057251,-0.045274,0.019537,-0.047896,0.044995,0.042162,0.053084,-0.007613,0.062382,0.002471,0.039521,0.016645,-0.084119,-0.032957,0.004136,0.030054,-0.004459,0.082735,-0.067162,0.030208,-0.003918,0.061451,0.035015,0.051196,0.054310,-0.087791,-0.014166,-0.012920,-0.059652,0.047477,0.007359,0.018303,-0.000596,0.004846,-0.011098,-0.043259,-0.068846,0.026744,-0.058946,-0.077995,-0.031668,0.044406,0.048835,0.042778,0.092850,0.019961,-0.047168,-0.023801,-0.024445,0.005461,0.035094,0.040246,-0.084231,0.108410,-0.016217,-0.084913,0.029692,0.033364,-0.030131,0.030006,-0.034990,0.110192,-0.002775,-0.123464,0.026843,0.049416,0.008615,0.084738,0.066860,-0.093357,-0.075641,-0.040754,0.003995,0.032993,0.039281,-0.037148,0.022402,-0.066372,0.030037,0.001299,-0.077018,0.026624,0.011288,0.141896,-0.041876,-0.091043,-0.007499,0.033303,-0.039009,-0.027223,0.027872,0.019763,-0.013469,0.001507,-0.001114,-0.061344,0.034882,-0.020031,-0.018246,0.067051,-0.047478,-0.037092,0.032481,-0.000936,0.003757,0.042584,-0.094316,0.083968,0.057133,-0.056799,0.043552,0.068739,0.032921,-0.014239,-0.021289,0.175739,-0.003480,0.007273,0.019501,0.022530,-0.043989,0.145082,-0.045529,-0.018308,-0.013510,0.017067,-0.085093,0.004443,-0.021111,-0.023110,0.001512,-0.054992,0.018476,0.025046,-0.071755,0.062865,0.094273,-0.070591,0.019251,0.017074,-0.037792,0.007909,-0.046531,-0.116275,-0.043647,0.065632,-0.069539,-0.009431,-0.074422,-0.008842,0.031023,-0.116366,-0.015754,-0.002553,-0.083269,0.084424,0.038026,0.034124,-0.000909,0.096909,0.035880,-0.006201,0.040237,0.002818,-0.075471,-0.020837,-0.073109,-0.074775,0.071590,-0.015222,0.065639,0.022896,-0.029950,0.028363,-0.067159,0.084792,-0.081447,0.057436,-0.032345,0.077123,0.079648,-0.015843,-0.037036,-0.030233,0.029869,0.056690,0.010251,0.043222,-0.014979,-0.001676,0.004884,-0.010625,0.037141,0.104176,0.019477,0.018654,-0.085816,-0.027392,-0.068016,0.102565,-0.004932,0.041786,0.042148,0.060584,0.031000,-0.026792,0.001922,-0.018730,0.062169,-0.062935,-0.001432,-0.048408,-0.024126,0.011265,0.026913,0.039781,0.001626,0.029034,-0.011520,-0.005915,-0.057777,-0.004329,0.096516,-0.040551,-0.053771,0.040810,-0.037849,-0.036120,-0.001662,-0.024468,-0.030522,-0.122640,-0.026504,-0.055294,-0.050259,0.010249,0.061247,0.034872,0.091252,0.045279,-0.080701,0.042647,-0.019980,-0.007992,-0.048391,0.030418,0.002326,-0.030960,0.029810,-0.062636,0.019803,0.034533,0.066490,0.011846,-0.068591,0.014373
4,0.019142,-0.009966,0.083020,0.059226,-0.012307,-0.004160,-0.009279,-0.040604,0.038387,0.009528,0.023852,-0.107390,0.108996,0.043977,0.001917,-0.051648,0.018300,0.006627,0.011676,0.019387,0.055234,0.038528,-0.078086,0.030915,0.011268,0.005607,-0.039898,0.088873,-0.024750,-0.080174,-0.057932,-0.009479,-0.109362,0.010215,0.084074,-0.074622,-0.024993,-0.000777,0.015988,-0.028726,-0.021852,0.003850,0.001579,0.025287,-0.003740,0.084518,-0.039409,0.031973,0.011456,-0.021681,0.037011,0.082420,-0.035461,0.118484,-0.049137,-0.043465,-0.096467,-0.015550,-0.099436,0.004000,0.017396,0.086425,-0.060902,0.074710,-0.066355,0.099929,-0.012262,0.053111,0.047214,0.037823,-0.061389,0.013144,-0.085469,-0.006815,0.006612,-0.016608,-0.024569,0.076233,-0.033237,-0.032165,-0.026002,-0.035342,0.067939,0.072233,-0.025725,-0.057173,-0.037230,0.003808,-0.060001,-0.015137,0.024340,-0.013240,0.055612,-0.045473,0.001871,-0.015489,-0.117270,0.024829,0.073778,0.013091,-0.035424,-0.048279,0.008805,-0.050106,0.122429,-0.016678,0.001191,-0.002268,0.036596,0.070881,-0.007854,-0.069502,0.024777,-0.041804,-0.064977,0.001543,0.072490,-0.047401,-0.022607,-0.017972,0.010405,-0.031051,0.016902,0.005390,-0.056011,0.002707,-0.028495,0.026556,0.037910,-0.034880,0.015717,-0.065456,0.046209,0.038240,0.056867,0.003093,0.068215,0.025739,0.054700,0.014671,-0.087851,-0.037646,0.019963,0.028968,0.012098,0.078704,-0.048828,0.020238,-0.001362,0.083886,0.032858,0.069608,0.048458,-0.080481,-0.051260,-0.043168,-0.069903,0.056797,-0.000538,0.017164,0.016167,0.016315,-0.020509,-0.040778,-0.057192,0.026186,-0.052333,-0.107856,-0.037405,0.035387,0.058646,0.049186,0.111162,-0.008217,-0.039463,-0.010876,-0.021684,-0.005336,0.026437,0.067046,-0.082520,0.115633,-0.004018,-0.084477,0.036192,0.035105,-0.053334,0.029053,-0.030826,0.111770,-0.025050,-0.113519,0.033941,0.028431,0.014730,0.048116,0.070168,-0.081730,-0.077634,-0.053111,-0.005990,0.024865,0.014925,-0.033916,0.052098,-0.064083,0.021036,-0.005010,-0.077555,0.018712,0.001798,0.147912,-0.069333,-0.073700,0.004239,0.050494,-0.054280,-0.031673,0.026201,0.015545,-0.030367,0.005968,0.006333,-0.053244,0.012833,0.010047,-0.025514,0.053292,-0.062995,-0.019817,0.048748,-0.019593,0.012503,0.042164,-0.090700,0.097082,0.035212,-0.043534,0.003804,0.051407,0.026944,0.007071,-0.022040,0.165414,-0.013389,0.021984,0.023856,-0.008072,-0.040484,0.137345,-0.023700,-0.014182,-0.021053,0.011586,-0.073382,0.026230,0.012172,-0.049244,-0.001095,-0.039864,0.006654,0.010018,-0.084974,0.055492,0.096434,-0.052835,0.010266,0.008373,-0.019504,-0.002244,-0.059342,-0.132586,-0.031428,0.045897,-0.092599,0.004111,-0.070950,-0.022626,0.035083,-0.115866,-0.027747,0.016394,-0.075019,0.060690,0.076076,0.039606,-0.027223,0.090119,0.033347,0.001044,0.049548,0.020870,-0.080472,-0.005850,-0.081926,-0.056721,0.072881,-0.036017,0.047342,0.032692,-0.039002,0.049503,-0.058697,0.062277,-0.075230,0.073547,-0.030614,0.063402,0.095650,-0.008866,-0.030061,-0.004879,0.028166,0.081771,0.013225,0.032041,-0.017616,-0.000176,0.017079,0.000147,0.026816,0.103652,0.017915,-0.011205,-0.046476,-0.053136,-0.093959,0.082098,-0.013590,0.053221,0.018600,0.056009,0.039602,-0.028182,-0.004856,-0.016784,0.030039,-0.053607,0.004276,-0.060119,-0.022347,-0.011791,-0.014232,0.019309,-0.015211,0.001186,-0.004290,-0.007742,-0.060215,-0.006297,0.089713,-0.040040,-0.021302,0.042711,-0.029516,-0.032443,-0.017238,-0.019681,-0.010436,-0.116634,-0.050745,-0.052421,-0.044409,0.007207,0.057489,0.050045,0.086229,0.046872,-0.058618,0.044954,-0.035791,-0.005010,-0.055170,0.044645,-0.008758,-0.022494,0.019186,-0.036540,0.010394,0.049242,0.055493,0.023778,-0.075720,0.023606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21608,0.019835,-0.035054,0.069809,0.043650,-0.013110,-0.011371,-0.016916,-0.063990,0.028327,0.012926,0.023116,-0.112367,0.109421,0.043949,0.005588,-0.042243,0.017976,0.017695,0.002774,0.026486,0.056448,0.029386,-0.062008,0.034393,0.019666,0.007466,-0.028781,0.069776,-0.039545,-0.091148,-0.069550,-0.012627,-0.091635,-0.013993,0.085297,-0.075184,-0.051855,-0.015434,0.014197,-0.013228,-0.028940,0.010345,0.001487,0.025445,-0.016213,0.106815,-0.042449,0.030779,0.010692,-0.029951,0.021119,0.080869,-0.026884,0.117054,-0.053842,-0.061098,-0.096989,-0.007509,-0.099874,0.010468,0.037034,0.098057,-0.057966,0.065498,-0.055329,0.102027,-0.016386,0.054989,0.037554,0.055724,-0.066108,0.013011,-0.101271,-0.016258,0.006946,-0.016060,-0.004095,0.074005,-0.033586,-0.031704,-0.026775,-0.027005,0.069770,0.090739,-0.014137,-0.073613,-0.044513,0.000478,-0.078885,-0.010320,0.033894,-0.011125,0.035347,-0.048338,0.006910,-0.001920,-0.117851,0.014010,0.071177,0.015991,-0.026057,-0.028561,0.009243,-0.050856,0.115084,0.004552,-0.001076,0.010158,0.028525,0.082525,-0.015744,-0.073808,0.021382,-0.045556,-0.070437,0.012988,0.056711,-0.061952,-0.010202,-0.010088,0.006309,-0.037821,0.017926,0.002020,-0.055270,0.010000,-0.036056,0.028603,0.041538,-0.038824,0.019524,-0.068023,0.038907,0.034894,0.057830,-0.005888,0.064100,0.017190,0.041746,0.020812,-0.087039,-0.036641,0.007943,0.031482,0.001370,0.069663,-0.058486,0.024331,0.007795,0.064902,0.035642,0.060710,0.035456,-0.069934,-0.041806,-0.039842,-0.073718,0.058371,0.001743,0.018565,0.022759,0.006943,-0.015078,-0.028360,-0.071037,0.028794,-0.050749,-0.087789,-0.039199,0.033148,0.060541,0.055571,0.107131,0.013661,-0.034562,-0.012855,-0.027922,0.011721,0.032832,0.058942,-0.077164,0.117130,-0.003079,-0.079974,0.032783,0.036354,-0.035715,0.033143,-0.042546,0.107512,-0.029955,-0.106096,0.033821,0.045430,0.010106,0.067896,0.074614,-0.095625,-0.074255,-0.049040,-0.001680,0.043142,0.016430,-0.034973,0.017718,-0.072033,0.020102,-0.014436,-0.076129,0.026779,-0.004614,0.144679,-0.058885,-0.087722,-0.007165,0.039987,-0.043847,-0.034480,0.034147,0.019036,-0.019015,0.005919,-0.003273,-0.053255,0.035816,0.003176,-0.021446,0.051300,-0.063839,-0.036505,0.039654,-0.006616,0.008359,0.039228,-0.100411,0.097927,0.040701,-0.047881,0.019117,0.068366,0.027408,-0.013801,-0.023776,0.165844,-0.002937,0.018866,0.019714,0.013244,-0.044919,0.143372,-0.023663,-0.012882,-0.016724,0.024929,-0.066268,0.011784,0.002597,-0.036128,0.004375,-0.033612,0.017279,0.027129,-0.082207,0.040723,0.101967,-0.057459,0.008873,0.014982,-0.021366,0.011887,-0.048988,-0.133764,-0.025228,0.064746,-0.077468,-0.012075,-0.068280,-0.016476,0.033559,-0.118348,-0.032470,0.005080,-0.073295,0.063889,0.054033,0.041005,-0.015600,0.089420,0.035970,0.003665,0.040188,0.004858,-0.062346,-0.010238,-0.085853,-0.066288,0.053871,-0.036417,0.061244,0.027678,-0.048346,0.046154,-0.057690,0.070157,-0.056114,0.069086,-0.024223,0.075576,0.083300,-0.012578,-0.024518,-0.014845,0.024538,0.075780,0.012028,0.037998,-0.014682,0.004698,0.013341,-0.004278,0.038549,0.119416,0.017928,-0.003109,-0.063752,-0.046652,-0.082631,0.073976,-0.007535,0.048541,0.029716,0.049166,0.039086,-0.033558,-0.003597,-0.017145,0.026276,-0.062210,0.003075,-0.069425,-0.027260,0.010704,-0.000358,0.027866,-0.000889,0.015120,0.002602,-0.004065,-0.056083,-0.006048,0.094322,-0.032578,-0.033833,0.040107,-0.045720,-0.039829,-0.011782,-0.016159,-0.014722,-0.128765,-0.046117,-0.053664,-0.052299,0.008150,0.059928,0.032431,0.078470,0.035391,-0.060662,0.044157,-0.027157,-0.002566,-0.059282,0.043885,0.005367,-0.027005,0.019347,-0.036204,0.022660,0.039447,0.060313,0.012248,-0.074269,0.026653
21609,0.019835,-0.035054,0.069809,0.043650,-0.013110,-0.011371,-0.016916,-0.063990,0.028327,0.012926,0.023116,-0.112367,0.109421,0.043949,0.005588,-0.042243,0.017976,0.017695,0.002774,0.026486,0.056448,0.029386,-0.062008,0.034393,0.019666,0.007466,-0.028781,0.069776,-0.039545,-0.091148,-0.069550,-0.012627,-0.091635,-0.013993,0.085297,-0.075184,-0.051855,-0.015434,0.014197,-0.013228,-0.028940,0.010345,0.001487,0.025445,-0.016213,0.106815,-0.042449,0.030779,0.010692,-0.029951,0.021119,0.080869,-0.026884,0.117054,-0.053842,-0.061098,-0.096989,-0.007509,-0.099874,0.010468,0.037034,0.098057,-0.057966,0.065498,-0.055329,0.102027,-0.016386,0.054989,0.037554,0.055724,-0.066108,0.013011,-0.101271,-0.016258,0.006946,-0.016060,-0.004095,0.074005,-0.033586,-0.031704,-0.026775,-0.027005,0.069770,0.090739,-0.014137,-0.073613,-0.044513,0.000478,-0.078885,-0.010320,0.033894,-0.011125,0.035347,-0.048338,0.006910,-0.001920,-0.117851,0.014010,0.071177,0.015991,-0.026057,-0.028561,0.009243,-0.050856,0.115084,0.004552,-0.001076,0.010158,0.028525,0.082525,-0.015744,-0.073808,0.021382,-0.045556,-0.070437,0.012988,0.056711,-0.061952,-0.010202,-0.010088,0.006309,-0.037821,0.017926,0.002020,-0.055270,0.010000,-0.036056,0.028603,0.041538,-0.038824,0.019524,-0.068023,0.038907,0.034894,0.057830,-0.005888,0.064100,0.017190,0.041746,0.020812,-0.087039,-0.036641,0.007943,0.031482,0.001370,0.069663,-0.058486,0.024331,0.007795,0.064902,0.035642,0.060710,0.035456,-0.069934,-0.041806,-0.039842,-0.073718,0.058371,0.001743,0.018565,0.022759,0.006943,-0.015078,-0.028360,-0.071037,0.028794,-0.050749,-0.087789,-0.039199,0.033148,0.060541,0.055571,0.107131,0.013661,-0.034562,-0.012855,-0.027922,0.011721,0.032832,0.058942,-0.077164,0.117130,-0.003079,-0.079974,0.032783,0.036354,-0.035715,0.033143,-0.042546,0.107512,-0.029955,-0.106096,0.033821,0.045430,0.010106,0.067896,0.074614,-0.095625,-0.074255,-0.049040,-0.001680,0.043142,0.016430,-0.034973,0.017718,-0.072033,0.020102,-0.014436,-0.076129,0.026779,-0.004614,0.144679,-0.058885,-0.087722,-0.007165,0.039987,-0.043847,-0.034480,0.034147,0.019036,-0.019015,0.005919,-0.003273,-0.053255,0.035816,0.003176,-0.021446,0.051300,-0.063839,-0.036505,0.039654,-0.006616,0.008359,0.039228,-0.100411,0.097927,0.040701,-0.047881,0.019117,0.068366,0.027408,-0.013801,-0.023776,0.165844,-0.002937,0.018866,0.019714,0.013244,-0.044919,0.143372,-0.023663,-0.012882,-0.016724,0.024929,-0.066268,0.011784,0.002597,-0.036128,0.004375,-0.033612,0.017279,0.027129,-0.082207,0.040723,0.101967,-0.057459,0.008873,0.014982,-0.021366,0.011887,-0.048988,-0.133764,-0.025228,0.064746,-0.077468,-0.012075,-0.068280,-0.016476,0.033559,-0.118348,-0.032470,0.005080,-0.073295,0.063889,0.054033,0.041005,-0.015600,0.089420,0.035970,0.003665,0.040188,0.004858,-0.062346,-0.010238,-0.085853,-0.066288,0.053871,-0.036417,0.061244,0.027678,-0.048346,0.046154,-0.057690,0.070157,-0.056114,0.069086,-0.024223,0.075576,0.083300,-0.012578,-0.024518,-0.014845,0.024538,0.075780,0.012028,0.037998,-0.014682,0.004698,0.013341,-0.004278,0.038549,0.119416,0.017928,-0.003109,-0.063752,-0.046652,-0.082631,0.073976,-0.007535,0.048541,0.029716,0.049166,0.039086,-0.033558,-0.003597,-0.017145,0.026276,-0.062210,0.003075,-0.069425,-0.027260,0.010704,-0.000358,0.027866,-0.000889,0.015120,0.002602,-0.004065,-0.056083,-0.006048,0.094322,-0.032578,-0.033833,0.040107,-0.045720,-0.039829,-0.011782,-0.016159,-0.014722,-0.128765,-0.046117,-0.053664,-0.052299,0.008150,0.059928,0.032431,0.078470,0.035391,-0.060662,0.044157,-0.027157,-0.002566,-0.059282,0.043885,0.005367,-0.027005,0.019347,-0.036204,0.022660,0.039447,0.060313,0.012248,-0.074269,0.026653
21610,0.001881,-0.047290,0.073722,0.011747,-0.027247,-0.016082,-0.058573,-0.029313,0.035192,0.044149,-0.002745,-0.102086,0.079029,0.046736,0.020035,-0.037034,0.045304,0.003064,0.005518,0.040733,0.048064,0.055127,-0.044484,0.020897,0.040543,-0.003770,-0.035720,0.044637,0.009856,-0.083843,-0.071919,-0.005554,-0.059270,-0.014939,0.065378,-0.027292,-0.052032,0.005903,0.012294,-0.033045,-0.041202,0.042874,-0.001613,0.038566,0.012130,0.063305,-0.043766,-0.000777,-0.012668,0.008737,0.054265,0.041289,-0.040580,0.100461,-0.067998,-0.044571,-0.099581,-0.015500,-0.081397,0.037531,-0.004924,0.049479,-0.087074,0.072380,-0.022246,0.095600,0.011049,0.051625,0.020084,0.003119,-0.035710,-0.007741,-0.083184,0.004589,0.043884,-0.039086,0.007067,0.058822,-0.043758,-0.022541,-0.041367,-0.054098,0.052061,0.045308,-0.030424,-0.030677,-0.041067,0.014950,-0.052664,-0.048059,-0.020601,-0.030324,0.011030,-0.047675,0.008113,-0.037856,-0.102813,0.027609,0.066042,0.004160,-0.046952,-0.019739,-0.016845,-0.038853,0.095002,-0.019060,0.030924,0.011776,-0.002012,0.048162,0.003633,-0.065250,0.008367,-0.068894,-0.018336,-0.027860,0.073091,-0.054308,-0.046734,0.001171,0.023461,-0.012699,0.008501,0.002351,-0.062857,-0.016105,-0.020568,0.019448,0.013467,-0.042975,0.029098,-0.024237,0.076241,0.048447,0.054392,0.069461,0.055439,0.003006,0.094572,0.004700,-0.074781,0.013517,0.008267,0.023986,-0.021836,0.057409,-0.083145,0.008059,-0.003399,0.067897,0.018654,0.085905,0.050988,-0.083571,-0.040075,-0.063140,-0.035657,0.068806,-0.036812,-0.015583,-0.003421,0.027547,-0.046234,-0.009961,-0.067576,0.005955,-0.058931,-0.090702,-0.003322,0.031617,0.058533,0.048530,0.121001,-0.008023,-0.028199,0.016124,-0.006789,0.009613,0.020967,0.085329,-0.083556,0.119142,0.005298,-0.041754,0.044433,0.026071,-0.017281,0.056710,-0.018535,0.119274,-0.039392,-0.068618,0.010662,0.021315,0.005878,0.053222,0.089602,-0.115902,-0.121481,-0.070433,-0.048345,0.074339,-0.006837,-0.018117,0.024900,-0.017777,0.012897,0.020066,-0.075821,0.052528,-0.016137,0.088185,-0.132549,-0.066378,0.040378,0.017280,-0.032639,-0.013031,0.018404,-0.004449,0.002405,-0.019861,0.011933,-0.056640,0.016069,-0.027150,-0.049485,0.088610,-0.010103,-0.000118,0.036867,-0.057338,0.000514,0.057461,-0.077601,0.086519,-0.005886,-0.012882,0.004315,0.054886,0.045777,-0.043640,-0.062697,0.178492,-0.027090,0.027352,0.016352,-0.033960,-0.008090,0.117862,-0.087594,0.040700,0.002433,0.048692,-0.103476,0.047403,-0.003210,-0.040199,0.026832,-0.044416,0.039274,0.017298,-0.089811,0.023662,0.092844,-0.062318,-0.062264,0.045853,-0.028575,-0.013734,-0.045252,-0.143896,-0.021359,0.025842,-0.069696,0.022741,-0.040644,-0.033262,0.024046,-0.106638,-0.016958,-0.053011,-0.061080,0.069121,0.087575,0.049147,0.008139,0.061070,0.032872,-0.014893,0.070636,0.082444,-0.065694,0.005419,-0.087427,-0.003309,0.087882,-0.038186,0.021625,0.037336,-0.014454,0.010613,-0.086073,0.039010,-0.088331,0.084498,-0.025872,0.078430,0.065417,0.009929,-0.012737,-0.007683,-0.003319,0.040620,0.017637,0.032409,0.000861,-0.033408,0.019888,0.005132,0.022628,0.118474,0.019484,-0.022411,-0.045812,-0.044929,-0.117390,0.046312,-0.057156,0.109774,0.032539,0.014033,0.035037,-0.036857,-0.000353,-0.011049,0.004956,-0.012156,-0.021163,-0.063705,-0.006343,0.012173,0.025496,-0.006829,-0.013482,0.019645,-0.044118,0.039574,-0.038442,0.005082,0.101648,0.013258,-0.053399,0.013822,0.012418,-0.032361,0.038526,-0.026542,-0.009807,-0.116941,-0.018092,-0.040861,-0.038600,0.036694,0.101301,0.047734,0.114759,0.031505,-0.074661,0.027318,-0.024822,0.022410,-0.034013,0.083949,0.046222,0.003524,-0.009101,-0.065740,-0.029793,0.071496,0.044120,0.006548,-0.031904,0.004339
21611,0.019835,-0.035054,0.069809,0.043650,-0.013110,-0.011371,-0.016916,-0.063990,0.028327,0.012926,0.023116,-0.112367,0.109421,0.043949,0.005588,-0.042243,0.017976,0.017695,0.002774,0.026486,0.056448,0.029386,-0.062008,0.034393,0.019666,0.007466,-0.028781,0.069776,-0.039545,-0.091148,-0.069550,-0.012627,-0.091635,-0.013993,0.085297,-0.075184,-0.051855,-0.015434,0.014197,-0.013228,-0.028940,0.010345,0.001487,0.025445,-0.016213,0.106815,-0.042449,0.030779,0.010692,-0.029951,0.021119,0.080869,-0.026884,0.117054,-0.053842,-0.061098,-0.096989,-0.007509,-0.099874,0.010468,0.037034,0.098057,-0.057966,0.065498,-0.055329,0.102027,-0.016386,0.054989,0.037554,0.055724,-0.066108,0.013011,-0.101271,-0.016258,0.006946,-0.016060,-0.004095,0.074005,-0.033586,-0.031704,-0.026775,-0.027005,0.069770,0.090739,-0.014137,-0.073613,-0.044513,0.000478,-0.078885,-0.010320,0.033894,-0.011125,0.035347,-0.048338,0.006910,-0.001920,-0.117851,0.014010,0.071177,0.015991,-0.026057,-0.028561,0.009243,-0.050856,0.115084,0.004552,-0.001076,0.010158,0.028525,0.082525,-0.015744,-0.073808,0.021382,-0.045556,-0.070437,0.012988,0.056711,-0.061952,-0.010202,-0.010088,0.006309,-0.037821,0.017926,0.002020,-0.055270,0.010000,-0.036056,0.028603,0.041538,-0.038824,0.019524,-0.068023,0.038907,0.034894,0.057830,-0.005888,0.064100,0.017190,0.041746,0.020812,-0.087039,-0.036641,0.007943,0.031482,0.001370,0.069663,-0.058486,0.024331,0.007795,0.064902,0.035642,0.060710,0.035456,-0.069934,-0.041806,-0.039842,-0.073718,0.058371,0.001743,0.018565,0.022759,0.006943,-0.015078,-0.028360,-0.071037,0.028794,-0.050749,-0.087789,-0.039199,0.033148,0.060541,0.055571,0.107131,0.013661,-0.034562,-0.012855,-0.027922,0.011721,0.032832,0.058942,-0.077164,0.117130,-0.003079,-0.079974,0.032783,0.036354,-0.035715,0.033143,-0.042546,0.107512,-0.029955,-0.106096,0.033821,0.045430,0.010106,0.067896,0.074614,-0.095625,-0.074255,-0.049040,-0.001680,0.043142,0.016430,-0.034973,0.017718,-0.072033,0.020102,-0.014436,-0.076129,0.026779,-0.004614,0.144679,-0.058885,-0.087722,-0.007165,0.039987,-0.043847,-0.034480,0.034147,0.019036,-0.019015,0.005919,-0.003273,-0.053255,0.035816,0.003176,-0.021446,0.051300,-0.063839,-0.036505,0.039654,-0.006616,0.008359,0.039228,-0.100411,0.097927,0.040701,-0.047881,0.019117,0.068366,0.027408,-0.013801,-0.023776,0.165844,-0.002937,0.018866,0.019714,0.013244,-0.044919,0.143372,-0.023663,-0.012882,-0.016724,0.024929,-0.066268,0.011784,0.002597,-0.036128,0.004375,-0.033612,0.017279,0.027129,-0.082207,0.040723,0.101967,-0.057459,0.008873,0.014982,-0.021366,0.011887,-0.048988,-0.133764,-0.025228,0.064746,-0.077468,-0.012075,-0.068280,-0.016476,0.033559,-0.118348,-0.032470,0.005080,-0.073295,0.063889,0.054033,0.041005,-0.015600,0.089420,0.035970,0.003665,0.040188,0.004858,-0.062346,-0.010238,-0.085853,-0.066288,0.053871,-0.036417,0.061244,0.027678,-0.048346,0.046154,-0.057690,0.070157,-0.056114,0.069086,-0.024223,0.075576,0.083300,-0.012578,-0.024518,-0.014845,0.024538,0.075780,0.012028,0.037998,-0.014682,0.004698,0.013341,-0.004278,0.038549,0.119416,0.017928,-0.003109,-0.063752,-0.046652,-0.082631,0.073976,-0.007535,0.048541,0.029716,0.049166,0.039086,-0.033558,-0.003597,-0.017145,0.026276,-0.062210,0.003075,-0.069425,-0.027260,0.010704,-0.000358,0.027866,-0.000889,0.015120,0.002602,-0.004065,-0.056083,-0.006048,0.094322,-0.032578,-0.033833,0.040107,-0.045720,-0.039829,-0.011782,-0.016159,-0.014722,-0.128765,-0.046117,-0.053664,-0.052299,0.008150,0.059928,0.032431,0.078470,0.035391,-0.060662,0.044157,-0.027157,-0.002566,-0.059282,0.043885,0.005367,-0.027005,0.019347,-0.036204,0.022660,0.039447,0.060313,0.012248,-0.074269,0.026653


In [13]:
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report

X = df_embedding.copy()
y = df_original['cluster_all_data']

# Assuming embeddings and labels are your data and target variables respectively
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an XGBoost model
model = xgb.XGBClassifier(objective='multi:softmax', eval_metric='logloss')  # Change objective to 'multi:softprob' for multi-class

# Train the model
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluation
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(classification_report(y_test, y_pred))


Accuracy: 0.9990747166319686
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3115
           1       0.99      1.00      1.00       781
           2       1.00      0.99      1.00       427

    accuracy                           1.00      4323
   macro avg       1.00      1.00      1.00      4323
weighted avg       1.00      1.00      1.00      4323



In [14]:
import torch
torch.save(model, "model_classification__torch.pth")


In [132]:
import joblib 
joblib.dump(model, 'model_classification_xgboost.pkl')


['classification_xgboost_model.pkl']

In [133]:
# import xgboost as xgb
# from sklearn.model_selection import train_test_split, GridSearchCV
# from sklearn.metrics import accuracy_score, classification_report

# # Assuming embeddings and labels are your data and target variables respectively
# X = df_embedding.copy()
# y = df_original['cluster_all_data']

# # Split the data into training and test sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Create an XGBoost model
# model = xgb.XGBClassifier(objective='multi:softmax', eval_metric='mlogloss', use_label_encoder=False)

# # Define the parameter grid for hyperparameter tuning
# param_grid = {
#     'n_estimators': [100, 200, 300],
#     'max_depth': [3, 5, 7],
#     'learning_rate': [0.01, 0.1, 0.2],
#     'subsample': [0.8, 1.0],
#     'colsample_bytree': [0.8, 1.0]
# }

# # Create a GridSearchCV object
# grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', cv=5, verbose=1, n_jobs=-1)

# # Fit the model using grid search
# grid_search.fit(X_train, y_train)

# # Get the best model from grid search
# best_model = grid_search.best_estimator_

# # Print the best parameters
# print("Best parameters found: ", grid_search.best_params_)

# # Predict using the best model
# y_pred = best_model.predict(X_test)

# # Evaluation
# print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
# print(classification_report(y_test, y_pred))


# LLM + Neural Networks

In [134]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import numpy as np

X = df_embedding.copy()
y = df_original['cluster_all_data']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

encoder = OneHotEncoder()
y_train = encoder.fit_transform(y_train.values.reshape(-1, 1)).toarray()
y_test = encoder.transform(y_test.values.reshape(-1, 1)).toarray()

# Convert one-hot encoded labels to 1D arrays
y_train = np.argmax(y_train, axis=1)
y_test = np.argmax(y_test, axis=1)

print(X_train.shape, y_train.shape)


(17290, 384) (17290,)


In [135]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report


# Encode labels if they are categorical
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

# Build the model
model = Sequential()
model.add(Dense(128, input_dim=384, activation='relu'))  # Adjust input_dim to match the number of features
model.add(Dropout(0.5))  # Dropout layer to prevent overfitting
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))  # Output layer for multiclass classification with 3 clas


# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])  # Use 'categorical_crossentropy' for multi-class

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=0)

# Predict
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

print(classification_report(y_test, y_pred))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 777us/step
Accuracy: 0.9990747166319686
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3115
           1       0.99      1.00      1.00       781
           2       1.00      0.99      1.00       427

    accuracy                           1.00      4323
   macro avg       1.00      1.00      1.00      4323
weighted avg       1.00      1.00      1.00      4323

