In [1]:
import pandas as pd
import numpy as np
import pickle
from itertools import compress

In [2]:
with open('../data/csgo_df.pkl', 'rb') as f:
    df_raw = pickle.load(f)

In [3]:
player_count = pd.read_csv('../data/csgo.csv')

In [4]:
df = df_raw.copy()

### Fill missing days with 0s for every item

#### List of items:

In [5]:
items = df.item_name.unique()

#### Possible parentheticals:

In [6]:
parentheticals = set([item[item.rfind('('):] for item in items if item.find('(') >= 0])
parentheticals

{'(Battle Green)',
 '(Battle-Scarred)',
 '(Bazooka Pink)',
 '(Blood Red)',
 '(Blue)',
 '(Brick Red)',
 '(Cash Green)',
 '(Desert Amber)',
 '(Dust Brown)',
 '(Factory New)',
 '(Field-Tested)',
 '(Foil)',
 '(Foil)  | MLG Columbus 2016',
 '(Foil) | Atlanta 2017',
 '(Foil) | Boston 2018',
 '(Foil) | Cluj-Napoca 2015',
 '(Foil) | Cologne 2015',
 '(Foil) | Cologne 2016',
 '(Foil) | DreamHack 2014',
 '(Foil) | Katowice 2014',
 '(Foil) | Katowice 2015',
 '(Foil) | Krakow 2017',
 '(Foil) | MLG Columbus 2016',
 '(Frog Green)',
 '(Gold) | Boston 2018',
 '(Gold) | Krakow 2017',
 '(Holo)',
 '(Holo) | Atlanta 2017',
 '(Holo) | Boston 2018',
 '(Holo) | Cologne 2014',
 '(Holo) | Cologne 2016',
 '(Holo) | DreamHack 2014',
 '(Holo) | Katowice 2014',
 '(Holo) | Katowice 2015',
 '(Holo) | Krakow 2017',
 '(Holo) | MLG Columbus 2016',
 '(Holo-Foil)',
 '(Jungle Green)',
 '(Minimal Wear)',
 '(Monarch Blue)',
 '(Monster Purple)',
 '(Princess Pink)',
 '(Red)',
 '(SWAT Blue)',
 '(Shark White)',
 '(Tiger Orange)'

#### We have colors, quality, foil, events (after '|', and likely separate of parentheticals if foil is missing)

#### Compress is like masking for lists

In [7]:
list(compress(items, ['(Foil)  | MLG Columbus 2016' in item for item in items]))

['Sticker | AdreN (Foil)  | MLG Columbus 2016']

In [8]:
'Sticker | AdreN (Foil) | MLG Columbus 2016'

'Sticker | AdreN (Foil) | MLG Columbus 2016'

In [9]:
df = df.replace(to_replace='Sticker | AdreN (Foil)  | MLG Columbus 2016', value='Sticker | AdreN (Foil) | MLG Columbus 2016')

#### Removed the extra space in that string

#### Looking at the items without parentheses

In [10]:
df['parenthetical'] = [item.find('(') >= 0 for item in df.item_name]

In [11]:
len(df[~df.parenthetical].item_name.unique())

1450

1450 unique items left to categorize

In [12]:
df.head()

Unnamed: 0,app,date,item_name,median_sell_price,quantity,release_date,parenthetical
0,730,1471583000.0,Glock-18 | Weasel (Battle-Scarred),6.193,135,18 August 2016,True
1,730,1471669000.0,Glock-18 | Weasel (Battle-Scarred),5.103,166,18 August 2016,True
2,730,1471756000.0,Glock-18 | Weasel (Battle-Scarred),4.721,142,18 August 2016,True
3,730,1471842000.0,Glock-18 | Weasel (Battle-Scarred),4.302,98,18 August 2016,True
4,730,1471928000.0,Glock-18 | Weasel (Battle-Scarred),3.898,125,18 August 2016,True


In [13]:
gun_list = ['AK-47',
 'AUG',
 'AWP',
 'CZ75-Auto',
 'Desert Eagle',
 'Dual Berettas',
 'FAMAS',
 'Five-SeveN',
 'G3SG1',
 'Galil AR',
 'Glock-18',
 'M249',
 'M4A1-S',
 'M4A4',
 'MAC-10',
 'MAG-7',
 'MP7',
 'MP9',
 'Negev',
 'Nova',
 'P2000',
 'P250',
 'P90',
 'PP-Bizon',
 'R8 Revolver',
 'SCAR-20',
 'SG 553',
 'SSG 08',
 'Sawed-Off',
 'Tec-9',
 'UMP-45',
 'USP-S',
 'XM1014']

#### Add guns column

In [14]:
df['gun_bool'] = [any([x in y for x in gun_list]) for y in df.item_name]

In [15]:
sorted(set([x.split('|')[0][:-1] for x in df[df.gun_bool].item_name.unique()]))

['AK-47',
 'AUG',
 'AWP',
 'CZ75-Auto',
 'Desert Eagle',
 'Dual Berettas',
 'FAMAS',
 'Five-SeveN',
 'G3SG1',
 'Galil AR',
 'Glock-18',
 'M249',
 'M4A1-S',
 'M4A4',
 'MAC-10',
 'MAG-7',
 'MP7',
 'MP9',
 'Negev',
 'Nova',
 'P2000',
 'P250',
 'P90',
 'PP-Bizon',
 'R8 Revolver',
 'SCAR-20',
 'SG 553',
 'SSG 08',
 'Sawed-Off',
 'Souvenir AK-47',
 'Souvenir AUG',
 'Souvenir AWP',
 'Souvenir CZ75-Auto',
 'Souvenir Desert Eagle',
 'Souvenir Dual Berettas',
 'Souvenir FAMAS',
 'Souvenir Five-SeveN',
 'Souvenir G3SG1',
 'Souvenir Galil AR',
 'Souvenir Glock-18',
 'Souvenir M249',
 'Souvenir M4A1-S',
 'Souvenir M4A4',
 'Souvenir MAC-10',
 'Souvenir MAG-7',
 'Souvenir MP7',
 'Souvenir MP9',
 'Souvenir Negev',
 'Souvenir Nova',
 'Souvenir P2000',
 'Souvenir P250',
 'Souvenir P90',
 'Souvenir PP-Bizon',
 'Souvenir R8 Revolver',
 'Souvenir SCAR-20',
 'Souvenir SG 553',
 'Souvenir SSG 08',
 'Souvenir Sawed-Off',
 'Souvenir Tec-9',
 'Souvenir UMP-45',
 'Souvenir USP-S',
 'Souvenir XM1014',
 'StatTrak™

#### Separate StatTraks and Souvenirs

In [16]:
df['StatTrak'] = [x.find('Stat') == 0 for x in df.item_name]

In [17]:
df.loc[df.StatTrak, 'item_name'] = [x.split('StatTrak™ ')[1] for x in df.loc[df.StatTrak, 'item_name']]

In [18]:
df[df.StatTrak].head()

Unnamed: 0,app,date,item_name,median_sell_price,quantity,release_date,parenthetical,gun_bool,StatTrak
8629,730,1471583000.0,P90 | Grim (Field-Tested),7.559,153,18 August 2016,True,True,True
8630,730,1471669000.0,P90 | Grim (Field-Tested),5.642,179,18 August 2016,True,True,True
8631,730,1471756000.0,P90 | Grim (Field-Tested),4.711,150,18 August 2016,True,True,True
8632,730,1471842000.0,P90 | Grim (Field-Tested),4.308,140,18 August 2016,True,True,True
8633,730,1471928000.0,P90 | Grim (Field-Tested),3.919,138,18 August 2016,True,True,True


#### Separate Souvenir

In [19]:
df['souvenir'] = [x.find('Souv') == 0 for x in df.item_name]

In [20]:
df.loc[df.souvenir, 'item_name'] = [x.split('Souvenir ')[1] for x in df.loc[df.souvenir, 'item_name']]

In [21]:
df[df.souvenir].head()

Unnamed: 0,app,date,item_name,median_sell_price,quantity,release_date,parenthetical,gun_bool,StatTrak,souvenir
195393,730,1416722000.0,P250 | Nuclear Threat (Minimal Wear),234.66,1,14 August 2013,True,True,False,True
195394,730,1419746000.0,P250 | Nuclear Threat (Minimal Wear),399.996,1,14 August 2013,True,True,False,True
195395,730,1420178000.0,P250 | Nuclear Threat (Minimal Wear),399.996,1,14 August 2013,True,True,False,True
195396,730,1421993000.0,P250 | Nuclear Threat (Minimal Wear),302.38,1,14 August 2013,True,True,False,True
195397,730,1426396000.0,P250 | Nuclear Threat (Minimal Wear),139.219,2,14 August 2013,True,True,False,True


#### Separate Gun, Skin, quality

In [22]:
df.loc[799720].date

1513922400.0

In [23]:
df['gun_type'], df['skin'], df['quality'] = np.nan, np.nan, np.nan

In [24]:
df.head()

Unnamed: 0,app,date,item_name,median_sell_price,quantity,release_date,parenthetical,gun_bool,StatTrak,souvenir,gun_type,skin,quality
0,730,1471583000.0,Glock-18 | Weasel (Battle-Scarred),6.193,135,18 August 2016,True,True,False,False,,,
1,730,1471669000.0,Glock-18 | Weasel (Battle-Scarred),5.103,166,18 August 2016,True,True,False,False,,,
2,730,1471756000.0,Glock-18 | Weasel (Battle-Scarred),4.721,142,18 August 2016,True,True,False,False,,,
3,730,1471842000.0,Glock-18 | Weasel (Battle-Scarred),4.302,98,18 August 2016,True,True,False,False,,,
4,730,1471928000.0,Glock-18 | Weasel (Battle-Scarred),3.898,125,18 August 2016,True,True,False,False,,,


In [25]:
df.loc[df.gun_bool, 'gun_type'] = [x.split(' |')[0] for x in df.item_name[df.gun_bool]]

In [26]:
df.loc[df.gun_bool, 'skin'] = [x.split('| ')[1][:x.split('| ')[1].rfind('(')] for x in df.item_name[df.gun_bool]]

In [27]:
df.loc[df.gun_bool, 'quality'] = [x.split('(')[-1][:-1] for x in df.item_name[df.gun_bool]]

In [28]:
df.head()

Unnamed: 0,app,date,item_name,median_sell_price,quantity,release_date,parenthetical,gun_bool,StatTrak,souvenir,gun_type,skin,quality
0,730,1471583000.0,Glock-18 | Weasel (Battle-Scarred),6.193,135,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred
1,730,1471669000.0,Glock-18 | Weasel (Battle-Scarred),5.103,166,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred
2,730,1471756000.0,Glock-18 | Weasel (Battle-Scarred),4.721,142,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred
3,730,1471842000.0,Glock-18 | Weasel (Battle-Scarred),4.302,98,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred
4,730,1471928000.0,Glock-18 | Weasel (Battle-Scarred),3.898,125,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred


#### Keys

In [29]:
df['keys'] = [x.find('Key') > -1 for x in df.item_name]

#### Capsules

In [30]:
df['capsules'] = [(x.find('Capsule') > -1) and not y for x, y in zip(df.item_name, df['keys'])]

#### Stickers

In [31]:
df['stickers'] = [(x.find('Sticker') >= 0) and not y and not z for x, y, z in zip(df.item_name, df['keys'], df['capsules'])]

In [32]:
df.head()

Unnamed: 0,app,date,item_name,median_sell_price,quantity,release_date,parenthetical,gun_bool,StatTrak,souvenir,gun_type,skin,quality,keys,capsules,stickers
0,730,1471583000.0,Glock-18 | Weasel (Battle-Scarred),6.193,135,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False
1,730,1471669000.0,Glock-18 | Weasel (Battle-Scarred),5.103,166,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False
2,730,1471756000.0,Glock-18 | Weasel (Battle-Scarred),4.721,142,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False
3,730,1471842000.0,Glock-18 | Weasel (Battle-Scarred),4.302,98,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False
4,730,1471928000.0,Glock-18 | Weasel (Battle-Scarred),3.898,125,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False


#### Tournaments

In [33]:
tournaments = ['2018 ELEAGUE Boston',
'2017 PGL Krakow',
'2015 ESL One Cologne', 
'2017 ELEAGUE Atlanta',
'2016 MLG Columbus',
'2016 ESL One Cologne',
'2015 DreamHack Cluj-Napoca', 
'2015 ESL One Katowice',
'2014 ESL One Cologne',
'2014 EMS One Katowice',
'2014 DreamHack Winter',
'2013 DreamHack Winter']

In [34]:
tournaments = [x[5:] + ' ' + x[0:4] for x in tournaments]

In [35]:
tournaments

['ELEAGUE Boston 2018',
 'PGL Krakow 2017',
 'ESL One Cologne 2015',
 'ELEAGUE Atlanta 2017',
 'MLG Columbus 2016',
 'ESL One Cologne 2016',
 'DreamHack Cluj-Napoca 2015',
 'ESL One Katowice 2015',
 'ESL One Cologne 2014',
 'EMS One Katowice 2014',
 'DreamHack Winter 2014',
 'DreamHack Winter 2013']

In [36]:
df['event_item'] = [any([x in y for x in tournaments]) for y in df.item_name]

In [51]:
set([x[x.rfind('|')+1:] for x in df[df['event_item']].item_name])

{' DreamHack Winter 2014',
 ' DreamHack Winter 2014 (Foil)',
 ' ESL One Cologne 2014 (Blue)',
 ' ESL One Cologne 2014 (Red)',
 ' MLG Columbus 2016',
 'DreamHack Cluj-Napoca 2015 Cache Souvenir Package',
 'DreamHack Cluj-Napoca 2015 Challengers (Foil)',
 'DreamHack Cluj-Napoca 2015 Cobblestone Souvenir Package',
 'DreamHack Cluj-Napoca 2015 Dust II Souvenir Package',
 'DreamHack Cluj-Napoca 2015 Inferno Souvenir Package',
 'DreamHack Cluj-Napoca 2015 Legends (Foil)',
 'DreamHack Cluj-Napoca 2015 Mirage Souvenir Package',
 'DreamHack Cluj-Napoca 2015 Overpass Souvenir Package',
 'DreamHack Cluj-Napoca 2015 Train Souvenir Package',
 'ESL One Cologne 2014 Cache Souvenir Package',
 'ESL One Cologne 2014 Challengers',
 'ESL One Cologne 2014 Cobblestone Souvenir Package',
 'ESL One Cologne 2014 Dust II Souvenir Package',
 'ESL One Cologne 2014 Inferno Souvenir Package',
 'ESL One Cologne 2014 Legends',
 'ESL One Cologne 2014 Mirage Souvenir Package',
 'ESL One Cologne 2014 Nuke Souvenir Packa

In [49]:
df[df['event_item']].item_name.iloc[100000][df[df['event_item']].item_name.iloc[100000].rfind('|'):]

'e'

In [50]:
df[df['event_item']].item_name.iloc[100000]

'ESL One Katowice 2015 Overpass Souvenir Package'

#### Let's leave everything else as 'Other' for now

#### Add player counts

In [39]:
df_player = pd.read_csv('../data/csgo.csv')

#### Add release dates

In [40]:
from pymongo import MongoClient
client = MongoClient()
db = client['steam_capstone']
collection = db['item_release_dates']

In [41]:
releases = list(collection.find())

In [42]:
# for entry in releases:
#     for item in entry['items']:
#         mask = [item + ' (' in x for x in csdf['item_name']]
#         csdf.loc[mask, 'release_date'] = entry['release_date']

#### Add readable and epoch date for each date feature

#### Put item_name back together

In [43]:
df.loc[:,'item_name'] = df_raw['item_name']
df = df.reset_index(drop=True)

#### Add days since release for time shifting

In [44]:
df['days_since_release'] = np.nan
df['days_since_release'] = df.groupby('item_name').transform(lambda x: np.arange(len(x)))

In [45]:
df.head()

Unnamed: 0,index,app,date,item_name,median_sell_price,quantity,release_date,parenthetical,gun_bool,StatTrak,souvenir,gun_type,skin,quality,keys,capsules,stickers,event_item,days_since_release
0,0,730,1471583000.0,Glock-18 | Weasel (Battle-Scarred),6.193,135,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,False,0
1,1,730,1471669000.0,Glock-18 | Weasel (Battle-Scarred),5.103,166,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,False,1
2,2,730,1471756000.0,Glock-18 | Weasel (Battle-Scarred),4.721,142,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,False,2
3,3,730,1471842000.0,Glock-18 | Weasel (Battle-Scarred),4.302,98,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,False,3
4,4,730,1471928000.0,Glock-18 | Weasel (Battle-Scarred),3.898,125,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,False,4


In [52]:
with open('../data/csgo_features.pkl', 'wb') as f:
    pickle.dump(df, f)

#### Now I need scripts to remove items that sell for $0.03 and have low quantity

# Moving everything over from 'simple_csgo_df.ipynb'

In [1]:
import pickle
import pandas as pd
import numpy as np
import datetime
import time

In [3]:
with open('../data/csgo_df.pkl', 'rb') as f:
    df = pickle.load(f)

In [4]:
df.head()

Unnamed: 0,app,date,item_name,median_sell_price,quantity,release_date
0,730,1471583000.0,Glock-18 | Weasel (Battle-Scarred),6.193,135,18 August 2016
1,730,1471669000.0,Glock-18 | Weasel (Battle-Scarred),5.103,166,18 August 2016
2,730,1471756000.0,Glock-18 | Weasel (Battle-Scarred),4.721,142,18 August 2016
3,730,1471842000.0,Glock-18 | Weasel (Battle-Scarred),4.302,98,18 August 2016
4,730,1471928000.0,Glock-18 | Weasel (Battle-Scarred),3.898,125,18 August 2016


In [5]:
df = df.drop(columns=['app', 'release_date'])

In [6]:
df['days_since_release'] = np.nan
df['days_since_release'] = df.groupby('item_name').transform(lambda x: np.arange(len(x)))

In [7]:
df['est_release'] = df.groupby('item_name')['date'].transform('min')

In [8]:
df.head()

Unnamed: 0,date,item_name,median_sell_price,quantity,days_since_release,est_release
0,1471583000.0,Glock-18 | Weasel (Battle-Scarred),6.193,135,0,1471583000.0
1,1471669000.0,Glock-18 | Weasel (Battle-Scarred),5.103,166,1,1471583000.0
2,1471756000.0,Glock-18 | Weasel (Battle-Scarred),4.721,142,2,1471583000.0
3,1471842000.0,Glock-18 | Weasel (Battle-Scarred),4.302,98,3,1471583000.0
4,1471928000.0,Glock-18 | Weasel (Battle-Scarred),3.898,125,4,1471583000.0


In [9]:
with open('../data/cs_df_S.pkl', 'wb') as f:
    pickle.dump(df, f)

In [11]:
with open('../data/csgo_features.pkl', 'rb') as f:    
    df_medium = pickle.load(f)

In [12]:
df_medium.head()

Unnamed: 0,app,date,item_name,median_sell_price,quantity,release_date,parenthetical,gun_bool,StatTrak,souvenir,gun_type,skin,quality,keys,capsules,stickers,event_item,days_since_release
0,730,1471583000.0,Glock-18 | Weasel (Battle-Scarred),6.193,135,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,False,0
1,730,1471669000.0,Glock-18 | Weasel (Battle-Scarred),5.103,166,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,False,1
2,730,1471756000.0,Glock-18 | Weasel (Battle-Scarred),4.721,142,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,False,2
3,730,1471842000.0,Glock-18 | Weasel (Battle-Scarred),4.302,98,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,False,3
4,730,1471928000.0,Glock-18 | Weasel (Battle-Scarred),3.898,125,18 August 2016,True,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,False,4


In [13]:
df_medium = df_medium.drop(columns=['app', 'release_date', 'parenthetical', 'event_item'])

In [14]:
df_medium.head()

Unnamed: 0,date,item_name,median_sell_price,quantity,gun_bool,StatTrak,souvenir,gun_type,skin,quality,keys,capsules,stickers,days_since_release
0,1471583000.0,Glock-18 | Weasel (Battle-Scarred),6.193,135,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,0
1,1471669000.0,Glock-18 | Weasel (Battle-Scarred),5.103,166,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,1
2,1471756000.0,Glock-18 | Weasel (Battle-Scarred),4.721,142,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,2
3,1471842000.0,Glock-18 | Weasel (Battle-Scarred),4.302,98,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,3
4,1471928000.0,Glock-18 | Weasel (Battle-Scarred),3.898,125,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,4


In [15]:
df_medium['est_release'] = df_medium.groupby('item_name')['date'].transform('min')

In [16]:
df_medium.head()

Unnamed: 0,date,item_name,median_sell_price,quantity,gun_bool,StatTrak,souvenir,gun_type,skin,quality,keys,capsules,stickers,days_since_release,est_release
0,1471583000.0,Glock-18 | Weasel (Battle-Scarred),6.193,135,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,0,1471583000.0
1,1471669000.0,Glock-18 | Weasel (Battle-Scarred),5.103,166,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,1,1471583000.0
2,1471756000.0,Glock-18 | Weasel (Battle-Scarred),4.721,142,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,2,1471583000.0
3,1471842000.0,Glock-18 | Weasel (Battle-Scarred),4.302,98,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,3,1471583000.0
4,1471928000.0,Glock-18 | Weasel (Battle-Scarred),3.898,125,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,4,1471583000.0


In [27]:
df_medium['date_str'] = [datetime.datetime.fromtimestamp(t).strftime('%d %b %Y') for t in df_medium.date]
df_medium['est_release_str'] = [datetime.datetime.fromtimestamp(t).strftime('%d %b %Y') for t in df_medium.est_release]

In [28]:
df_medium.head()

Unnamed: 0,date,item_name,median_sell_price,quantity,gun_bool,StatTrak,souvenir,gun_type,skin,quality,keys,capsules,stickers,days_since_release,est_release,date_str,est_release_str
0,1471583000.0,Glock-18 | Weasel (Battle-Scarred),6.193,135,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,0,1471583000.0,19 Aug 2016,19 Aug 2016
1,1471669000.0,Glock-18 | Weasel (Battle-Scarred),5.103,166,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,1,1471583000.0,20 Aug 2016,19 Aug 2016
2,1471756000.0,Glock-18 | Weasel (Battle-Scarred),4.721,142,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,2,1471583000.0,21 Aug 2016,19 Aug 2016
3,1471842000.0,Glock-18 | Weasel (Battle-Scarred),4.302,98,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,3,1471583000.0,22 Aug 2016,19 Aug 2016
4,1471928000.0,Glock-18 | Weasel (Battle-Scarred),3.898,125,True,False,False,Glock-18,Weasel,Battle-Scarred,False,False,False,4,1471583000.0,23 Aug 2016,19 Aug 2016


In [31]:
df_medium.columns

Index(['date', 'item_name', 'median_sell_price', 'quantity', 'gun_bool',
       'StatTrak', 'souvenir', 'gun_type', 'skin', 'quality', 'keys',
       'capsules', 'stickers', 'days_since_release', 'est_release', 'date_str',
       'est_release_str'],
      dtype='object')

In [32]:
cols = ['item_name', 'date_str', 'median_sell_price', 'quantity', 'est_release_str',
        'days_since_release', 'gun_type', 'skin', 'quality', 'gun_bool', 'StatTrak',
        'souvenir', 'keys', 'capsules', 'stickers', 'date', 'est_release']

In [35]:
df_medium = df_medium[cols]

In [39]:
df_medium['timestamp'] = [np.datetime64(datetime.datetime.fromtimestamp(t).date()) for t in df_medium.date]

In [40]:
with open('../data/cs_df_M.pkl', 'wb') as f:
    pickle.dump(df_medium, f)

In [2]:
np.datetime64(datetime.datetime.fromtimestamp(1.471842e+09).date())

numpy.datetime64('2016-08-22')