In [1]:
pip install --upgrade numexpr

Collecting numexpr
  Downloading numexpr-2.8.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.7 kB)
Downloading numexpr-2.8.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (384 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m384.1/384.1 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: numexpr
  Attempting uninstall: numexpr
    Found existing installation: numexpr 2.7.3
    Uninstalling numexpr-2.7.3:
      Successfully uninstalled numexpr-2.7.3
Successfully installed numexpr-2.8.7
Note: you may need to restart the kernel to use updated packages.


In [53]:
import pandas as pd
import numpy as np
import os
import boto3
import sagemaker
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier, NearestNeighbors
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import classification_report, accuracy_score
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

In [54]:
# Reading the datasets
buyer_dataset = pd.read_csv('./noon_perfumes_buyer_dataset.csv')
noon_perfumes = pd.read_csv('./noon_perfumes_dataset.csv')
test_dataset = pd.read_csv('./test.csv')

In [55]:
buyer_dataset = buyer_dataset.dropna(axis=0)
buyer_dataset = buyer_dataset[buyer_dataset['preference_base_note'] != '']
buyer_dataset = buyer_dataset[buyer_dataset['preference_middle_note'] != '']

# 성별 원핫인코딩
gender_encoding = {'Men': 0, 'Women': 1}
buyer_dataset['buyer_gender'] = buyer_dataset['buyer_gender'].map(gender_encoding)

# brand와 name을 합쳐 하나의 feature로 통일
buyer_dataset['type'] = buyer_dataset['brand'] + '-' + buyer_dataset['name']
buyer_dataset = buyer_dataset.drop(['brand', 'name'], axis=1)
buyer_dataset = buyer_dataset.reset_index(drop=True)

## [one-hot encoding] preference_base_note & preference_middle_note
def get_buyer_notes_type_set(dataset):
  note_set = set()
  for i in range(dataset.shape[0]):
    note_set.add(buyer_dataset.loc[i]['preference_base_note'])
    note_set.add(buyer_dataset.loc[i]['preference_middle_note'])
  return note_set

def change_categorical_notes_to_encoding(dataset):
  for i in range(dataset.shape[0]):
    dataset.at[i, dataset.loc[i]['preference_base_note']] = 1
    dataset.at[i, dataset.loc[i]['preference_middle_note']] = 1
  dataset = dataset.drop(['preference_base_note'], axis=1)
  dataset = dataset.drop(['preference_middle_note'], axis=1)


note_set = get_buyer_notes_type_set(buyer_dataset)
print(note_set)

for note in note_set:
  buyer_dataset = pd.concat([buyer_dataset, pd.DataFrame({note: [0 for i in range(buyer_dataset.shape[0])]})], axis=1)

change_categorical_notes_to_encoding(buyer_dataset)



print(f"Rows: {buyer_dataset.shape[0]}\nColumns: {buyer_dataset.shape[1]}")
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(buyer_dataset.head())

buyer_dataset.drop(['preference_base_note', 'preference_middle_note'], axis=1, inplace=True)


# type 라벨 인코더
label_encoder = LabelEncoder()
buyer_dataset['type_encoded'] = label_encoder.fit_transform(buyer_dataset['type'])

# 매핑 type 이름 - 라벨
type_encoding_dict = dict(zip(buyer_dataset['type'], buyer_dataset['type_encoded']))


{'Ambery', 'RedWoods', 'Almodmilk', 'Tob**co', 'Distinct', 'Sugar', 'Citric', 'Rice', 'Moroccanjasmine', 'OrangeBlossom', 'musk', 'Litchi', 'OrangeFlowerAbsolute', 'Sunflower', 'Gardenias', 'RangeBlossom', 'lily-of-the-valley', 'Velvet', 'PeruBalsam', 'RangoonCreeper', 'BurntAmber', 'Maple', 'LiquidAmber', 'Geranium', 'cinnamon', 'CreamAccords', 'Fishing', 'Ambernotes', 'IrisAmbergris', 'mimosa', 'Haitianvetiver', 'AmberNuancedClarySage', 'JasmineTea', 'Cherry', 'wood', 'Cybet', 'Essencesofrare', 'BalsamVetiver', 'GreenApple', 'SuedeAccord', 'CrispGreenApple', 'EffervescentCitrus.', 'Bergamot', 'Whiteflowers', 'Woodsy', 'CedarAccords', 'Lily-Of-The-valley', 'WildJasmine', 'GuaiacTree', 'Peppery', 'Sage', 'MilkySandalwood', 'JasmineInAnAllianceWithHibiscusBlossom', 'IndianJasmine', 'WhiteWoods', 'GreenTea', 'Patchoulie', 'Cassis', 'BroomFlower)', 'OakMoss', 'Plum', 'MexicanTuberose', 'CeylonTea', 'Ylang-Ylan', 'orangeblossom', 'Bran', 'Coriander', 'CardamomWithBlackTeaLeaves', 'OrangeTr

Rows: 4961
Columns: 825


Unnamed: 0,preference_base_note,preference_middle_note,satisfaction,buyer_gender,buyer_age,user_id,type,Ambery,RedWoods,Almodmilk,Tob**co,Distinct,Sugar,Citric,Rice,Moroccanjasmine,OrangeBlossom,musk,Litchi,OrangeFlowerAbsolute,Sunflower,Gardenias,RangeBlossom,lily-of-the-valley,Velvet,PeruBalsam,RangoonCreeper,BurntAmber,Maple,LiquidAmber,Geranium,cinnamon,CreamAccords,Fishing,Ambernotes,IrisAmbergris,mimosa,Haitianvetiver,AmberNuancedClarySage,JasmineTea,Cherry,wood,Cybet,Essencesofrare,BalsamVetiver,GreenApple,SuedeAccord,CrispGreenApple,EffervescentCitrus.,Bergamot,Whiteflowers,Woodsy,CedarAccords,Lily-Of-The-valley,WildJasmine,GuaiacTree,Peppery,Sage,MilkySandalwood,JasmineInAnAllianceWithHibiscusBlossom,IndianJasmine,WhiteWoods,GreenTea,Patchoulie,Cassis,BroomFlower),OakMoss,Plum,MexicanTuberose,CeylonTea,Ylang-Ylan,orangeblossom,Bran,Coriander,CardamomWithBlackTeaLeaves,OrangeTreeBlossom,CandiedFruits,LignumVitae,Narcissus,LilyOfTheValley,AromaticCedarwood,Clementine,WildHeather,PineApple,MirabellePlum,UnexpectedCarrot,Hay,CandiedRose,Jasmin,caraway,PatchouliHeart,Salt,patchouli,Tobacco,Almond,Damascus,RedApple,Haitian,Vanille,Pink,Rosewood,Bluebell,Passionfruit,BrownSugarAccord,AmberMilk,AmbregrisAccord,pinetree,CelerySeeds,Laurels,RareWood,Hedione,Ginger,WhiteMuguet,RoseHip,Strawberries,IntensivelyClean,Olibanum,PlamTree,BlackOpiumCoffee,jasmine,vetiver,Tubereuse,marigold,ElegantJasmine,TailRose,Chestnut,ylang-ylang,WoodyNotes,GaïacWood,PinkPepper,woody-amber,Saffron,DakotaLilies,TeaRose,Ceder,Mahogany,Tuberose,VioletSandalwood,PowderyVanilla,Ylang,ExoticWoods,SambacJasmineAbsolute,BourbonVetiver,SpicyNotes,RedCedarWhiteMusk,Labdanum,Almonds,Lily-Of-The-Valley,cedar,Orangeblossom,WhiteWood,Davana,Lavender,LuxuriousNotesofWhiskey,DriedFruit,geranium,WarmSandalwood.,Strawberry,Lily,JasmineTrace,AfricanViolet,Cloves,WoodsyNotes,CottonCandy,GaiacWood,Patchouli,cashmereWood,SensualAddictionOfTheTonkaBeanNote,PowderyIris,Pelargonium,PatchouliLeaf,cardamom,RedFruits,Pear,RoastedCocoa,LotusFlower,ArabicJasmine,AfricanOrangeFlower,bayleaves,Clove,Basil,Peony,tuberose,Honeysuckle,RedRose,Maninka,TropicalFruits,BayLeaf,Apple,RedPepper,YellowPlum,ChineseJasmine,Sensual,FreesiaFlowers,Reseda,Oudwood,Cumin,VanillaPods,ChampacaFlower,Marine,Amber-like-notes,Tonka,Agarwood(oud),AromaticScents,WhiteFlorals,Pacsuli,Greenapple,Masculine,bakhoor,LuminousOrangeBlossom,Resin,AustralianSandalwood,WarmSpicesOfWildAromaticBlackSesame,Creamy,Fern,Magnalio,BrazilianRedwood,FloralPetal,VanillaAbsolute,Mexicanchocolate,DamaskRose,Powdery,Orchid,CherokeeRose,WoodyAmber,sichuanpepper,Lemon,Lavenday,spicynotes,whiteFennel,oakmoss,AfricanGeranium,GuaiacWood,PineTree,SuedeMuskSandalwoodCashmeran,Subtle,Cuddly,LouroAmareloWood,StarApple,WateryNotes,MagnoliaPetals,Fruity,Styrax,CaribbeanMagnolia,MuskWood,CashereWoods,freesia,Jasmine,Bamboo,GreenNotes,CopperAccords,Galbanum,Vetyver,sandalwood,SichaunPepper,TeakWood,Nectarine,ClementineZest,Cassia,Stephanotis,balsamic,WhiteFreesia,SensualBenzoin,HotGinger,Vanillaorchid,FloralSpicy,Magnolia,GoldenWoodsMusk,Opoponax,HaitianVetiver,Moss,Leathernote,LemonBlossom,PacoRabanneOlympea,JuniperBerries,Flowers,WhiteJasmine,AnimalisticFloral,HoneySuckle,tonkabeans,GingerLily,StarAnise,OrangeFlower,OakmossVetiver,HaitiVetiver,DanmaskRose,woodynotes,GreenGrass,JasmineAbsolute,WhiteChocolateOrchid,Spices,AmberFat,Elemi,SaltyAmber,JasminePetals,LilyoftheValley,VioletLeaf,Water,CacaoPod,VanillaBean,BourbonVanilla,Lychee,Cedar,Chocolate,BlondWood,Musky,Oriental,Juniper,Roses,LemonTree,BlackberryAreSoftenedWithDelicateViolet,lilyofthevalley,softmusk,SicilianLemon,RedCurrantBlossom,MustyOud,NeroliAbsolute,MuskyWoody,Watermelon,BlackPeony,AppleBlossom,DewyEnglishRoses,BulgarianRoseAbsolute,DelicateMuskySensualWoods,Aldehyde,AlmondBlossom,Lily-of-the-valley,SkinAccord,mint,IndianSambacJasmine,PinkFreesia,leather,VirginiaCedar,BlueCoralAquaspaceAccord,Apricot,WildStrawberry,Suede,Kiwano,MauritanianRedPepper,Coconut,Cashmirwood,BlueHyacinth,AtlasCedar,Orcanox,Toacco,juniperberries,Raspberry,Woodsytransparentwithcoffee,BlackBasil,ChilledVodka,Fruity-Gourmand,OrangeBlossomAbsolute,DryWoods.,Carambola,WaterJasmine,JasmineSambac,SpicyNotesOfCarnation,SilverSage,Praline,TonkaBean,Agarwood,PreciousTobacco,Mint,PalisanderRosewood,rose,WarmWoods,Cistus,LilyofTheValley,InokiWood,guaiacwood,Papyrus,TuberRose,Musks,Leathernotes,GrannySmithApple,LovelyVanilla,Leather,Milk,iris,Lemongrass,Georgywood,OrrisRoot,Cashmeran,Calone,bergamot,PineTreeNeedles,LusciousNarcissus,GrapefruitZests,LavenderSage,VanillaPod,Amaryllis,Teakwood,BenzoinTolu,Turkish,RaspberryLeaf,FirResin,CrispGreenNotes,tonkabean,WildHyacinth,SouthAmericanPimento,Chamomile,DryAmber,WoodyAccords,Musk.,RedBerries,WhiteAmber,Honey,YlangYlang,lily,liliofthevalley,Greenleavesoflily,Myrrh,Flower,Wildviolet,Majorjam,Driftwood,AmberWoods,ChineseRose,PeachNectar,Sandalwod,Seanotes,Pepper,CedarWood,narcissus,Pimento,CoralJasmine,DelicateMuskFragrance,pepper,TiareFlower,Mimosa,ToluBalsam,AgarwoodSmoke,BirchLeaf,Fir,Floral,carrot,Quince,Cashmere,Plants,LaotianBenzoin,ViolacioccaFlower,AccordsofWildStrawberry,Ambergris,petitgrain,BlackOrchid,Ylang-Ylang,RoseWater,Wisteria,ChineseCedar,Tea,Woods,Cypress,Rose,FloralNotes,IndonesianPatchouliEssence,TurkishRoseAbsolute,Earthy,Grenadine,Cardamom,tobacco.,incensetouches,OrangeBlossoms,FrangipaniFlower,Marshmallow,violet,Muguet,WildRose,Violet,Oakmoss,Juniperberry,ArumLily,TanzanianBlackPepper,BlackPepper,Agar,WoodyLeather,WaterNotes,OrangeBlossomExtract,elegantlilyflower,AMixOfSandalwood,WhitePepper,Cocoa,Bean,Melon,CocaFlower,CrystalMoss,Spicy,Frangipani,RosePetalAccord,LuidSandalwood,RoseTuberose,Hibiscus,SweetJasmine,Kiwi,Rosepetals,Beeswax,BlackCurrant,OzonicTonesWithGeranium,Civet,Verbena,KingWood,SichuanPepper,vanilla,LemonWood.,diamondmusk,Olivewood,Anise,ClarySage,WhiteHeliotrope,LotusBlossom,WhiteRose,Heliotropine,Incense,DelightfullySweetYlang-Ylang,Blackberry,Lavandin,Woodsy-MuskBase,BlondeLeatherPatchouliWhiteWoodsAmber,Leathery,BrazilianRosewood,DarkChocolate,LivelyBlackBasil,AlmondMilk,Leatherwood,honey,LightWood,BlackAmber,WaterLily,CoolEucalyptus,CreamyVanilla,Peach,Rosemary,Coffee,Balsamic,SunflowerSeed,LaosBenzoin,Daffodil,ylang,CreamyRosewood,gardenialilyofthevalley,SaltedVanilla,LemdDeSantal,Petitgrain,MoroccanJasmine,SweetWood,LivingOsmanthusFlower,TurkishRoseGardeniaCyclamen,IntoxicatingSeductressRose,CasablancaLily,RosePetals,Cashmerean,jasmin,Oud,MandarinOrange,Platanus,FrenchLabdanum,TurkishRose,Woodynotes,BurningOud,Osmanthus,VintageVanilla,Orris,CoconutSalt,LivingTempleIncense,orrisroot,Warm,GoldenQuince,Shepherd,heliotrope,apple,OsmanthusFlower,LebaneseBlueCedar,Gardenia,gardenia,RoseOrangeBlossom,JasminSambac,OliveTree,AmberyWoods,VioletRoot,BourbonPepper,WhiteFlowers,ButterflyViolets,VanillaOrchid,EssenceOfBirch,MadagascarVanillaAbsolute,AmbretteSeeds,Blackcurrant,RoseDeMaiIndian,Rum,Africanorange,RaspberryVilolet,FloralAccent,Orange,Woody,SensualWoods,Mahonial,CitrusWood,Pine,RosedeMai,RoseGeranium,Pinetree,WoodyAccord,PalisanderRosemary,FleurDeLys,Night-BloomingJasmine,RoseHipRedCyclamen,bamboo,LabdanumRum,Roseabsolute,GuatemalanCardamom,Lilac,NeriumOleander,WoodyCashmeran,HibiscusSeed,GreyAmber,MadagascarVanilla,Akigalawood,DriedFruits.,RoseDamascenaEssence,cypress,carnation,Ambrofix,CentifoliaRose,CactusFlowers,Ambra,WhiteMusks,BlossomPeach,Amyris,Tarragon,TonkaBeans,CharismaticVirilityOfACedarwood,SliceOfApple,Plums,SeaNotes,PapyrusWood,amberwood,WarmWomanSkinAccord,FloralRhythmOfPeony,Birch,Heliotrope,Thyme,tarragon,TahitiTonka,Sandalwood,OrrangeBlossom,IndonesianPatchouli,Tonkabean,AcaciaWood,PeachBlossom,BalsaWood,ResinousCardamom,lotus,Exotic,GoldenAmber,LavenderExtract,Wood,Musk,Cactus,BlackMusk,FloralHarmonyofMagnolia,CottonFlower,CinnamonSpices,SmallCoconut,JasmineDry,Amber,Patchoulli,Guimave,SweetFruityRaspberryNuance,Ambroxan,Chrysanthemum,AlmondPowder,ambroxan,Cupcakes,GrapefruitBlossom,PressedSicilianLemon,CashmereWood,VanillaLeather,SilverMoss,Ylang-ylang,SpicySensuality-GreenPepper,NightQueenFlower,Neroli,MoepelAccord,GingerFlower,OzonicNotes,amber,Vetiver,LebaneseCedar,raspberry,moss,BourbonGeranium,VanillaFlowers,DarkPlum,MousseAustralian,SoftPeony,BlueOrrisAccord,HimalayanCedar,SensualMusk,EgyptianMusk,BalsamFir,IndianGinger,Lily-Of-The-ValleyRose,incense,OrangeBloosm,Lily-of-the-Valley,Animal,RedLily,Magnoli,Musk-like-notes,PinkHoneysuckle,WhiteSandalwood,leathertonkabean,Lamon,BlackViolet,SandalWood,VanillaPodsfromMadagascar,CreamyHoney,Vanilla,Licorice,NaturalRoseAbsolute,Carrot,PeonyTiare,AromaticAccords,OliveFlowerSpicedWithStarAnise,sage,Schijnhulst,DamascusPlum,Lily-of-thevalley,CreamyAmber,Whitecedar,WoodMoss,Caramel,exoticamethystfreesia,JacarandaWood,Freesia,VanillaStyrax,Cinnamon,Caraway,WhitePeach,Evernyl,Carnation,Hyacinth,HibiscusSeeds,BloodOrange,MoroccanRose,PearWood,PatchouliLeaves,PowderyNotes,MuskyNotes,AmberyWoody,NutmegOil,JoshuaTree,Cyclamen,EbonyWood,BulgarianRose,VioletLeaves,Coumarin,Muget,SkinMusk,pinkpepper,EarthyVetiver,Ebony,PineappleFlower,RoastedTonkaGrain,CaramelMusk,IndianPatchouli,Nutmeg,muguetpetals,RedCader,Oudh,Frankincense,TahitianTiareFlowers,Benzoin,RoseAbsolute,WhiteCedarExtract,SambacJasmine,Whiskey,SeaNotesMixedWithSpices,Pineapple,WormwoodCashmereWood,Mayrose,CuddlyAromas,labdanum,Cacao,WhiteMusk,ShavingSoap,RoseAbsolu,NaturalOudOil,benzoin,CashmereWoods,Agarwood(Oud),Cedarwood,PreciousMusk,MarineNotes,AgaveNectar,Hazelnut,CreamyMusk,Seaweed,saffron,Iris,Lotus,RooibosTeaExtract(redBushTea),AmberWood,CoconutAccords,RoastedAlmonds,Cranberries,drywoody-amber,lavender,ChineseOsmanthus,Amberwood,OliveBlossom,CashmirWood,VegetalAmber,aHintofFreshClementineZest,MassoiaWood,TahitianVetiver,oud,EvergreenCypress,Plumeria,OrrisConcrete,CitrusOrchards,FloralAmberyWoody,FlowersHerbShepherd,ParaguayanLignumVitae,IsoESuper,RippledSandAccord
0,Spices,Rose,5,1,30,115.0,Franck Olivier-Oud Vanille,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Wood,SoftPeony,4,0,21,74.0,Genie Collection-London Burberry,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Musk,Vanilla,4,1,34,80.0,CARON-Pour Un Homme De Caron,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,BurntAmber,Sage,5,0,37,256.0,Calvin Klein-Euphoria Intense,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,OakMoss,Birch,4,0,29,70.0,DIESEL-Plus Plus Masculine,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [56]:
type_encoding_df = pd.DataFrame({'type': buyer_dataset['type'], 'type_encoded': buyer_dataset['type_encoded']})
type_encoding_df

Unnamed: 0,type,type_encoded
0,Franck Olivier-Oud Vanille,239
1,Genie Collection-London Burberry,281
2,CARON-Pour Un Homme De Caron,98
3,Calvin Klein-Euphoria Intense,139
4,DIESEL-Plus Plus Masculine,159
...,...,...
4956,MONTALE-Roses Musk,421
4957,Dior-Sauvage,189
4958,Dolce & Gabbana-The One,198
4959,Alina Corel-Monsieur Oud,35


In [57]:
X = buyer_dataset.drop(['type_encoded', 'type', 'user_id'], axis=1)
y = buyer_dataset['type_encoded'].astype('int')
dataset_columns = X.columns.tolist()

print(len(np.array(dataset_columns).tolist()))
print(X_test.shape)

821
(993, 821)


In [58]:
%store X
%store y
%store dataset_columns
%store type_encoding_df

Stored 'X' (DataFrame)
Stored 'y' (Series)
Stored 'dataset_columns' (list)
Stored 'type_encoding_df' (DataFrame)


In [59]:
# get current session region
session = boto3.session.Session()
region = session.region_name
print(f'currently in {region}')

currently in ap-northeast-2


In [60]:
# use the default sagemaker s3 bucket to store processed data
# here we figure out what that default bucket name is 
sagemaker_session = sagemaker.Session()
bucket_name = 'sagemaker-gacheon-ml2-team1'
print(bucket_name)
# bucket name format: "sagemaker-gacheon-{account 숫자}"

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker-gacheon-ml2-team1


In [61]:
%store bucket_name

Stored 'bucket_name' (str)


In [62]:
# save data locally first
dest = 'ml-latest-small/s3'
train_x_path = os.path.join(dest, 'train_x.npy')
train_y_path = os.path.join(dest, 'train_y.npy')
dataset_columns_path = os.path.join(dest, 'dataset_columns.npy')
label_encode_path = os.path.join(dest, 'label_encode.npy')

!mkdir {dest}
np.save(train_x_path, X.values, allow_pickle=True)
np.save(train_y_path, y.values, allow_pickle=True)
np.save(dataset_columns_path, dataset_columns, allow_pickle=True)
np.save(label_encode_path, type_encoding_df.values, allow_pickle=True)

sagemaker_session.upload_data(train_x_path, bucket=bucket_name, key_prefix='data')
sagemaker_session.upload_data(train_y_path, bucket=bucket_name, key_prefix='data')
sagemaker_session.upload_data(dataset_columns_path, bucket=bucket_name, key_prefix='data')
sagemaker_session.upload_data(label_encode_path, bucket=bucket_name, key_prefix='data')

mkdir: cannot create directory ‘ml-latest-small/s3’: File exists


's3://sagemaker-gacheon-ml2-team1/data/label_encode.npy'