In [18]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import export_text
from sklearn.tree import export_graphviz
from sklearn.metrics import r2_score
import json
import csv

### Getting the differences

In [19]:
df_soften = pd.read_csv('../data/fromNftTracker/fluff.csv', sep=';').drop(columns=['day','average','median','deviation','returns'])
df_soften

Unnamed: 0,date,soften
0,20210807,0.415000
1,20210808,0.400000
2,20210809,0.768333
3,20210810,0.722778
4,20210811,0.753704
...,...,...
403,20220917,2.091678
404,20220918,2.078794
405,20220919,1.997157
406,20220920,1.991984


In [20]:
df_price = pd.read_csv('../logger/transactions/databaseForTree.csv', sep=';')
df_price

Unnamed: 0,date,price,tokenId
0,20210807,0.4800,8834
1,20210808,0.5000,8869
2,20210808,1.0000,1737
3,20210808,0.1900,1671
4,20210808,0.2200,2972
...,...,...,...
16811,20221014,3.2900,3303
16812,20221014,2.2480,5710
16813,20221014,2.6231,8171
16814,20221014,1.9500,1130


In [21]:
df = df_price.merge(df_soften,on='date')
df

Unnamed: 0,date,price,tokenId,soften
0,20210807,0.480,8834,0.415000
1,20210807,0.350,8873,0.415000
2,20210808,0.500,8869,0.400000
3,20210808,1.000,1737,0.400000
4,20210808,0.190,1671,0.400000
...,...,...,...,...
16644,20220920,1.705,5538,1.991984
16645,20220920,2.100,6057,1.991984
16646,20220920,2.050,37,1.991984
16647,20220920,2.550,9141,1.991984


In [22]:
df['difference'] = df.apply(lambda x: ((x['price'] / x['soften'])-1),axis=1)
#=(price/softened)-1
df

Unnamed: 0,date,price,tokenId,soften,difference
0,20210807,0.480,8834,0.415000,0.156627
1,20210807,0.350,8873,0.415000,-0.156627
2,20210808,0.500,8869,0.400000,0.250000
3,20210808,1.000,1737,0.400000,1.500000
4,20210808,0.190,1671,0.400000,-0.525000
...,...,...,...,...,...
16644,20220920,1.705,5538,1.991984,-0.144069
16645,20220920,2.100,6057,1.991984,0.054226
16646,20220920,2.050,37,1.991984,0.029125
16647,20220920,2.550,9141,1.991984,0.280131


In [23]:
df = df.drop(columns=['price','soften'])
df

Unnamed: 0,date,tokenId,difference
0,20210807,8834,0.156627
1,20210807,8873,-0.156627
2,20210808,8869,0.250000
3,20210808,1737,1.500000
4,20210808,1671,-0.525000
...,...,...,...
16644,20220920,5538,-0.144069
16645,20220920,6057,0.054226
16646,20220920,37,0.029125
16647,20220920,9141,0.280131


### Decision tree

In [24]:
df_pred = pd.read_csv('../data/clusters/df_cluster.csv').drop(columns=['Unnamed: 0','cluster']).reset_index().rename(columns={'index':'TokenId'})
df_pred

Unnamed: 0,TokenId,Sex,Fur,Eyes,Eyewear,Top,Neck,Head,Mouth,Nose,Ears,Expression,Background
0,0,1,1,1,1,1,1,1,1,1,1,1,1
1,1,1,2,2,2,2,2,2,1,2,2,2,2
2,2,1,3,3,2,2,2,3,2,3,3,3,2
3,3,1,2,1,2,2,3,2,3,4,4,1,3
4,4,1,4,1,3,2,3,4,3,5,5,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,1,10,4,2,3,19,4,5,4,9,8,20
9996,9996,1,5,12,2,1,16,4,5,4,8,8,5
9997,9997,1,14,6,11,2,3,7,10,8,2,7,15
9998,9998,1,11,2,9,5,3,4,9,6,2,7,15


In [25]:
prediction = []
for index, row in df.iterrows():
    y = row['difference']
    try:
        x = df_pred.loc[df_pred['TokenId'] == int(row['tokenId'])].values[0]
    except:
        continue
    fluff = {'Traits':x,'Difference':y}
    prediction.append(fluff)

In [28]:
print(prediction[0])

{'Traits': array([8834,    2,    9,    8,    2,    6,    1,    8,   12,    4,    6,
          3,    8], dtype=int64), 'Difference': 0.15662650602409633}


##### generating the csv to feed the decision tree (all tokens)

In [30]:
with open('./data/test.csv', 'w',newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames = ['Traits','Difference'])
    writer.writeheader()
    writer.writerows(prediction)

### Decision Tree Testing


In [12]:
def format_to_decisiontree(x,y):
    Y = y
    X = []
    for i in range(1,13):
        X.append(x[i])
    return (X,Y)

def format_to_prediction(x:list):
    X = []
    for i in range(1,13):
        X.append(int(x[i]))
    return (X)

In [13]:
clf = DecisionTreeRegressor()
X = []
Y = []
for index, row in df.iterrows():
    y = row['difference']
    try:
        x = df_pred.loc[df_pred['TokenId'] == int(row['tokenId'])].values[0]
    except:
        continue
    x_,y_ = format_to_decisiontree(x,y)
    X.append(x_)
    Y.append(y)
    
clf.fit(X, Y)   

In [14]:
print(">>token characteristics",X[0],">>prediction:",clf.predict([X[0]]))
print(Y[0])
y_res = clf.predict(X)
r2 = r2_score(Y,y_res)
print(">>r2:",r2)

>>token characteristics [2, 9, 8, 2, 6, 1, 8, 12, 4, 6, 3, 8] >>prediction: [0.065]
0.065
>>r2: 0.7873258739078599


In [15]:
df_pred_simplified = df_pred.drop(columns=['TokenId'])

Visualizing Decision Tree

In [16]:
decision_tree_saved = export_text(clf, feature_names=list(df_pred_simplified))
print(decision_tree_saved)

|--- Nose <= 11.00
|   |--- Neck <= 19.50
|   |   |--- Sex <= 1.50
|   |   |   |--- Mouth <= 15.50
|   |   |   |   |--- Ears <= 1.50
|   |   |   |   |   |--- Head <= 1.50
|   |   |   |   |   |   |--- Background <= 12.00
|   |   |   |   |   |   |   |--- Eyes <= 5.00
|   |   |   |   |   |   |   |   |--- value: [9.78]
|   |   |   |   |   |   |   |--- Eyes >  5.00
|   |   |   |   |   |   |   |   |--- value: [18.42]
|   |   |   |   |   |   |--- Background >  12.00
|   |   |   |   |   |   |   |--- Top <= 5.00
|   |   |   |   |   |   |   |   |--- Eyes <= 7.50
|   |   |   |   |   |   |   |   |   |--- value: [0.35]
|   |   |   |   |   |   |   |   |--- Eyes >  7.50
|   |   |   |   |   |   |   |   |   |--- value: [0.70]
|   |   |   |   |   |   |   |--- Top >  5.00
|   |   |   |   |   |   |   |   |--- value: [1.44]
|   |   |   |   |   |--- Head >  1.50
|   |   |   |   |   |   |--- Expression <= 14.50
|   |   |   |   |   |   |   |--- Eyes <= 4.50
|   |   |   |   |   |   |   |   |--- Head <= 6.50
| 

Saving Decision Tree

In [33]:
import pickle
# save the classifier
with open('my_dumped_classifier.dat', 'wb') as fid:
    pickle.dump(clf, fid)    

In [34]:
# load it again
with open('my_dumped_classifier.dat', 'rb') as fid:
    clf_loaded = pickle.load(fid)

In [47]:
clf_loaded

In [38]:
print(">>token characteristics",X[0],">>prediction:",clf_loaded.predict([X[0]]))

>>token characteristics [2, 10, 2, 2, 1, 8, 9, 5, 7, 6, 6, 11] >>prediction: [0.79519076]


Other ways to save the model

In [40]:
from io import StringIO
out = StringIO()
out = export_graphviz(clf, out_file=out)
out

In [41]:
from inspect import getmembers
# print( getmembers( clf.tree_ ) )

Transforming from python model to tensorflow.js

### Decision tree per cluster

In [42]:
df_cluster_0 = pd.read_json('../data/clusters/cluster0.json', lines=True).transpose().rename(columns={0:'TokenId'})
df_cluster_0

Unnamed: 0,TokenId
0,25
1,29
2,37
3,48
4,57
...,...
985,9979
986,9983
987,9994
988,9998


In [43]:
df_cluster0 = df_cluster_0.merge(df_pred,on='TokenId')
df_cluster0

Unnamed: 0,TokenId,Sex,Fur,Eyes,Eyewear,Top,Neck,Head,Mouth,Nose,Ears,Expression,Background
0,25,2,9,6,2,6,3,4,5,6,8,8,11
1,29,1,8,6,2,2,5,4,6,2,3,7,12
2,37,1,9,4,2,1,3,8,6,9,6,7,14
3,48,1,12,7,2,2,8,5,5,2,6,8,20
4,57,2,8,8,3,5,5,4,5,4,6,9,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...
985,9979,1,8,12,2,1,3,1,5,9,6,10,15
986,9983,2,12,11,2,1,3,1,5,9,11,13,18
987,9994,2,8,5,2,5,3,5,5,9,8,9,18
988,9998,1,11,2,9,5,3,4,9,6,2,7,15


In [44]:
clf_cluster_0 = DecisionTreeRegressor()
X = []
Y = []
for index, row in df.iterrows():
    try:
        y = row['difference']
        x = df_cluster0.loc[df_cluster0['TokenId'] == int(row['tokenId'])].values[0]
        x_,y_ = format_to_decisiontree(x,y)
        X.append(x_)
        Y.append(y)
    except:
        continue

clf_cluster_0.fit(X, Y)   

In [45]:
len(X)

1761

In [46]:
print(">>token characteristics",X[0],">>prediction:",clf_cluster_0.predict([X[0]]))
y_res = clf_cluster_0.predict(X)
r2 = r2_score(Y,y_res)
print(">>r2:",r2)

>>token characteristics [2, 10, 2, 2, 1, 8, 9, 5, 7, 6, 6, 11] >>prediction: [0.79519076]
>>r2: 0.8729335924400746
