In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from kneed import KneeLocator
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler

In [3]:
from sklearn.model_selection import cross_val_score, train_test_split, cross_val_predict

### Importing datasets

In [4]:
punk_attributes = pd.read_csv('raw_punks.csv')

In [5]:
sales = pd.read_csv('sales.csv')

### Getting "current price" dataset

In [6]:
### Getting current price

current_price = sales.groupby('punk_id').max().sort_values(by='amount_ethereum', ascending=False)

### Getting aggregated dataset to model

In [7]:
punk_attributes_price = pd.merge(punk_attributes, current_price, left_on='id', right_on='punk_id', how='left')

In [8]:
punk_attributes_price

Unnamed: 0,Unnamed: 0_x,id,types,rarity,skin,total_traits,traits,total_hidden_traits,hidden_traits,Unnamed: 0_y,Amount,From,To,Txn,Type,amount_ethereum,amount_dollars
0,0,0,Female,2023,Mid,3,"Blonde Bob,Earring,Green Eye Shadow",1,Earring,31.0,"25Ξ ($2,822)",0xf5099e,0xe08c32,"Nov 30, 2018",Sold,25.0,2822.0
1,1,1,Male,4352,Dark,2,"Mohawk,Smile",0,,80.0,"60Ξ ($36,305)",EliteCat…,GoWest23,"Nov 30, 2020",Sold,60.0,36305.0
2,2,2,Female,8090,Light,1,Wild Hair,0,,,,,,,,,
3,3,3,Male,7896,Dark,3,"Nerd Glasses,Pipe,Wild Hair",0,,,,,,,,,
4,4,4,Male,7332,Mid,4,"Big Shades,Earring,Goat,Wild Hair",0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,9995,Female,2466,Albino,2,"Purple Eye Shadow,Straight Hair Dark",0,,,,,,,,,
9996,9996,9996,Male,4323,Light,4,"Cigarette,Crazy Hair,Earring,Smile",0,,,,,,,,,
9997,9997,9997,Zombie,498,Zombie,2,"Cap Forward,Front Beard",0,,,,,,,,,
9998,9998,9998,Female,1271,Mid,3,"Black Lipstick,Clown Eyes Green,Wild White Hair",0,,,,,,,,,


### Creating new Human column used to replace Type and Skin 

In [9]:
condiciones = [punk_attributes_price['types']=='Zombie', punk_attributes_price['types']=='Alien', punk_attributes_price['types']=='Ape', punk_attributes_price['types']=='Male', punk_attributes_price['types']=='Female']
valores = [0, 0, 0, 1, 1]

punk_attributes_price['human'] = np.select(condiciones, valores) 

In [10]:
punk_attributes_price

Unnamed: 0,Unnamed: 0_x,id,types,rarity,skin,total_traits,traits,total_hidden_traits,hidden_traits,Unnamed: 0_y,Amount,From,To,Txn,Type,amount_ethereum,amount_dollars,human
0,0,0,Female,2023,Mid,3,"Blonde Bob,Earring,Green Eye Shadow",1,Earring,31.0,"25Ξ ($2,822)",0xf5099e,0xe08c32,"Nov 30, 2018",Sold,25.0,2822.0,1
1,1,1,Male,4352,Dark,2,"Mohawk,Smile",0,,80.0,"60Ξ ($36,305)",EliteCat…,GoWest23,"Nov 30, 2020",Sold,60.0,36305.0,1
2,2,2,Female,8090,Light,1,Wild Hair,0,,,,,,,,,,1
3,3,3,Male,7896,Dark,3,"Nerd Glasses,Pipe,Wild Hair",0,,,,,,,,,,1
4,4,4,Male,7332,Mid,4,"Big Shades,Earring,Goat,Wild Hair",0,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,9995,Female,2466,Albino,2,"Purple Eye Shadow,Straight Hair Dark",0,,,,,,,,,,1
9996,9996,9996,Male,4323,Light,4,"Cigarette,Crazy Hair,Earring,Smile",0,,,,,,,,,,1
9997,9997,9997,Zombie,498,Zombie,2,"Cap Forward,Front Beard",0,,,,,,,,,,0
9998,9998,9998,Female,1271,Mid,3,"Black Lipstick,Clown Eyes Green,Wild White Hair",0,,,,,,,,,,1


### Creating new ranges for total traits: 

if total traits:

    - values 2, 3, 4 & 5, assign 1
    - values 0, 1, 6 & 7, assign 2


In [11]:
condiciones_traits = [punk_attributes_price['total_traits']==2, punk_attributes_price['total_traits']==3, punk_attributes_price['total_traits']==4, punk_attributes_price['total_traits']==5, punk_attributes_price['total_traits']==0, punk_attributes_price['total_traits']==1, punk_attributes_price['total_traits']==6, punk_attributes_price['total_traits']==7]
valores_traits = [1, 1, 1, 1, 2, 2, 2, 2]

punk_attributes_price['traits_range'] = np.select(condiciones_traits, valores_traits) 

### Creating new ranges for total hidden traits: 

if total traits:
    - = 3, then assign 1
    - = 0 and 1, then assign 2
    - = 2 then assign 3

In [12]:
condiciones_hidden_traits = [punk_attributes_price['total_hidden_traits']==3, punk_attributes_price['total_hidden_traits']==0, punk_attributes_price['total_hidden_traits']==1, punk_attributes_price['total_hidden_traits']==2]
valores_hidden_traits = [1, 2, 2, 3]

punk_attributes_price['hidden_traits_range'] = np.select(condiciones_hidden_traits, valores_hidden_traits) 

### Dropping unnecessary columns

In [13]:
df_to_model = punk_attributes_price[['human', 'rarity', 'traits_range', 'hidden_traits_range', 'amount_ethereum']]

In [14]:
df_to_model

Unnamed: 0,human,rarity,traits_range,hidden_traits_range,amount_ethereum
0,1,2023,1,2,25.0
1,1,4352,1,2,60.0
2,1,8090,2,2,
3,1,7896,1,2,
4,1,7332,1,2,
...,...,...,...,...,...
9995,1,2466,1,2,
9996,1,4323,1,2,
9997,0,498,1,2,
9998,1,1271,1,2,


### Reducing sample to only Punks having registered Price

In [15]:
len(df_to_model)

10000

In [16]:
df_to_model = df_to_model[df_to_model['amount_ethereum'].notna()]

In [17]:
len(df_to_model)

5622

### Standardizing numerical values

In [18]:
scaler = StandardScaler()

In [19]:
numeric_variables = ['rarity', 'traits_range', 'hidden_traits_range', 'amount_ethereum']

In [20]:
df_to_model[numeric_variables] = scaler.fit_transform(df_to_model[numeric_variables])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [21]:
df_to_model

Unnamed: 0,human,rarity,traits_range,hidden_traits_range,amount_ethereum
0,1,-1.101820,-0.155661,-0.021784,0.036753
1,1,-0.287452,-0.155661,-0.021784,0.439804
14,1,-1.754993,-0.155661,-0.021784,-0.222351
33,1,0.860496,6.424226,-0.021784,-0.245267
53,1,-0.821389,-0.155661,-0.021784,0.002206
...,...,...,...,...,...
9973,1,0.334251,-0.155661,-0.021784,-0.068616
9974,1,-0.939226,-0.155661,-0.021784,-0.135983
9976,1,-1.061259,-0.155661,-0.021784,-0.233867
9979,1,0.835670,-0.155661,-0.021784,-0.179743


In [22]:
df_to_model.to_csv('df_to_model.csv')