# kaggle 설치

https://www.kaggle.com/datasets/pranav941/hows-that-dog-for-me

In [2]:
!pip install kaggle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [4]:
!cp -r drive/MyDrive/kaggle.json /content
!ls -1ha kaggle.json
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

kaggle.json


In [5]:
!kaggle datasets download -d pranav941/hows-that-dog-for-me

Downloading hows-that-dog-for-me.zip to /content
  0% 0.00/7.62k [00:00<?, ?B/s]
100% 7.62k/7.62k [00:00<00:00, 20.6MB/s]


In [6]:
!unzip hows-that-dog-for-me.zip

Archive:  hows-that-dog-for-me.zip
  inflating: Breed_Rank.csv          
  inflating: Breed_Traits.csv        


In [3]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import numpy as np
import pandas as pd

# 데이터 불러오기

In [8]:
breed_favor = pd.read_csv("Breed_Traits.csv")
breed_favor

Unnamed: 0,Breed,Affectionate With Family,Good With Young Children,Good With Other Dogs,Shedding Level,Coat Grooming Frequency,Drooling Level,Coat Type,Coat Length,Openness To Strangers,Playfulness Level,Watchdog/Protective Nature,Adaptability Level,Trainability Level,Energy Level,Barking Level,Mental Stimulation Needs
0,Retrievers (Labrador),5,5,5,4,2,2,Double,Short,5,5,3,5,5,5,3,4
1,French Bulldogs,5,5,4,3,1,3,Smooth,Short,5,5,3,5,4,3,1,3
2,German Shepherd Dogs,5,5,3,4,2,2,Double,Medium,3,4,5,5,5,5,3,5
3,Retrievers (Golden),5,5,5,4,2,2,Double,Medium,5,4,3,5,5,3,1,4
4,Bulldogs,4,3,3,3,3,3,Smooth,Short,4,4,3,3,4,3,2,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190,Cesky Terriers,4,5,3,2,2,1,Wavy,Medium,4,3,3,4,3,3,3,3
191,American Foxhounds,3,5,5,3,1,1,Smooth,Short,3,3,3,3,3,4,5,3
192,Azawakhs,3,3,3,2,2,1,Smooth,Short,1,3,3,3,2,3,1,3
193,English Foxhounds,5,5,5,3,1,2,Double,Short,4,4,3,4,4,4,5,4


# 데이터 수정

## column 의 공백을 처리
- \xa0 을 " "로 수정

In [9]:
breed_favor["Breed"] = breed_favor["Breed"].str.replace("\xa0", " ")

## Coat Length
- Short, Medium, Long 를 5점 만점으로 변환하기 위해 각각 1, 3, 5점으로 변경

In [10]:
breed_favor["Coat Length"].unique() # 1, 3, 5 점으로

array(['Short', 'Medium', 'Long'], dtype=object)

In [11]:
breed_favor["Coat Type"].unique()   # drop

array(['Double', 'Smooth', 'Curly', 'Silky', 'Wavy', 'Wiry', 'Hairless',
       'Rough', 'Corded'], dtype=object)

In [13]:
coat_type_mapper = {"Short": 1, "Medium": 3, "Long": 5}
breed_favor["Coat Length"] = breed_favor["Coat Length"].apply(lambda coat_type: coat_type_mapper[coat_type])
breed_favor["Coat Length"]

0      1
1      1
2      3
3      3
4      1
      ..
190    3
191    1
192    1
193    1
194    1
Name: Coat Length, Length: 195, dtype: int64

## Coat Type 항목은 삭제

In [14]:
breed_favor.drop("Coat Type", axis=1, inplace=True)
breed_favor.columns

Index(['Breed', 'Affectionate With Family', 'Good With Young Children',
       'Good With Other Dogs', 'Shedding Level', 'Coat Grooming Frequency',
       'Drooling Level', 'Coat Length', 'Openness To Strangers',
       'Playfulness Level', 'Watchdog/Protective Nature', 'Adaptability Level',
       'Trainability Level', 'Energy Level', 'Barking Level',
       'Mental Stimulation Needs'],
      dtype='object')

## Breed 항목을 index로 지정

In [15]:
breed_favor.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 16 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   Breed                       195 non-null    object
 1   Affectionate With Family    195 non-null    int64 
 2   Good With Young Children    195 non-null    int64 
 3   Good With Other Dogs        195 non-null    int64 
 4   Shedding Level              195 non-null    int64 
 5   Coat Grooming Frequency     195 non-null    int64 
 6   Drooling Level              195 non-null    int64 
 7   Coat Length                 195 non-null    int64 
 8   Openness To Strangers       195 non-null    int64 
 9   Playfulness Level           195 non-null    int64 
 10  Watchdog/Protective Nature  195 non-null    int64 
 11  Adaptability Level          195 non-null    int64 
 12  Trainability Level          195 non-null    int64 
 13  Energy Level                195 non-null    int64 

In [16]:
breed_favor.set_index("Breed", inplace=True)
breed_favor

Unnamed: 0_level_0,Affectionate With Family,Good With Young Children,Good With Other Dogs,Shedding Level,Coat Grooming Frequency,Drooling Level,Coat Length,Openness To Strangers,Playfulness Level,Watchdog/Protective Nature,Adaptability Level,Trainability Level,Energy Level,Barking Level,Mental Stimulation Needs
Breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Retrievers (Labrador),5,5,5,4,2,2,1,5,5,3,5,5,5,3,4
French Bulldogs,5,5,4,3,1,3,1,5,5,3,5,4,3,1,3
German Shepherd Dogs,5,5,3,4,2,2,3,3,4,5,5,5,5,3,5
Retrievers (Golden),5,5,5,4,2,2,3,5,4,3,5,5,3,1,4
Bulldogs,4,3,3,3,3,3,1,4,4,3,3,4,3,2,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Cesky Terriers,4,5,3,2,2,1,3,4,3,3,4,3,3,3,3
American Foxhounds,3,5,5,3,1,1,1,3,3,3,3,3,4,5,3
Azawakhs,3,3,3,2,2,1,1,1,3,3,3,2,3,1,3
English Foxhounds,5,5,5,3,1,2,1,4,4,3,4,4,4,5,4


# 추천 알고리즘 적용
- 아이템 기반 최근접 이웃 협업필터링
- 견종은 이미 정해져있기 때문에 서비스 초반부터 적용해도 무리가 없다

## 견종들 간의 코사인유사도를 구함

In [17]:
from sklearn.metrics.pairwise import cosine_similarity

item_sim = cosine_similarity(breed_favor, breed_favor)
item_sim

array([[1.        , 0.97646281, 0.96482459, ..., 0.95383065, 0.98029014,
        0.98352272],
       [0.97646281, 1.        , 0.93900048, ..., 0.9306615 , 0.94647203,
        0.94196146],
       [0.96482459, 0.93900048, 1.        , ..., 0.96891191, 0.95323464,
        0.97542674],
       ...,
       [0.95383065, 0.9306615 , 0.96891191, ..., 1.        , 0.93430931,
        0.95846768],
       [0.98029014, 0.94647203, 0.95323464, ..., 0.93430931, 1.        ,
        0.97874134],
       [0.98352272, 0.94196146, 0.97542674, ..., 0.95846768, 0.97874134,
        1.        ]])

In [18]:
item_sim_df = pd.DataFrame(data=item_sim, index=breed_favor.index, columns=breed_favor.index)
item_sim_df

Breed,Retrievers (Labrador),French Bulldogs,German Shepherd Dogs,Retrievers (Golden),Bulldogs,Poodles,Beagles,Rottweilers,Pointers (German Shorthaired),Dachshunds,...,Chinooks,Finnish Spitz,Grand Basset Griffon Vendeens,Sloughis,Harriers,Cesky Terriers,American Foxhounds,Azawakhs,English Foxhounds,Norwegian Lundehunds
Breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Retrievers (Labrador),1.000000,0.976463,0.964825,0.974885,0.974105,0.921762,0.974220,0.944638,0.987237,0.961338,...,0.946372,0.956193,0.969395,0.983472,0.980290,0.961371,0.954907,0.953831,0.980290,0.983523
French Bulldogs,0.976463,1.000000,0.939000,0.977077,0.963749,0.897091,0.926572,0.950022,0.958353,0.932047,...,0.901972,0.910909,0.934649,0.945617,0.946472,0.945523,0.899864,0.930661,0.946472,0.941961
German Shepherd Dogs,0.964825,0.939000,1.000000,0.957289,0.953431,0.951561,0.942046,0.964165,0.982127,0.944141,...,0.952475,0.966955,0.958464,0.974949,0.953235,0.966399,0.930814,0.968912,0.953235,0.975427
Retrievers (Golden),0.974885,0.977077,0.957289,1.000000,0.960769,0.926389,0.936245,0.946268,0.961382,0.925109,...,0.935996,0.929287,0.947342,0.953646,0.943180,0.964013,0.908220,0.940772,0.943180,0.955134
Bulldogs,0.974105,0.963749,0.953431,0.960769,1.000000,0.930572,0.929341,0.956201,0.970262,0.957895,...,0.928371,0.929670,0.956774,0.949769,0.946829,0.940871,0.906226,0.940019,0.946829,0.971193
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Cesky Terriers,0.961371,0.945523,0.966399,0.964013,0.940871,0.971859,0.953637,0.908532,0.962869,0.951281,...,0.972792,0.973124,0.966616,0.945036,0.961616,1.000000,0.944414,0.936321,0.961616,0.966045
American Foxhounds,0.954907,0.899864,0.930814,0.908220,0.906226,0.893949,0.982876,0.869432,0.952892,0.950399,...,0.969657,0.969426,0.941786,0.947239,0.983767,0.944414,1.000000,0.918391,0.983767,0.966819
Azawakhs,0.953831,0.930661,0.968912,0.940772,0.940019,0.919239,0.948166,0.945992,0.965815,0.922325,...,0.935235,0.941584,0.931317,0.965575,0.934309,0.936321,0.918391,1.000000,0.934309,0.958468
English Foxhounds,0.980290,0.946472,0.953235,0.943180,0.946829,0.918821,0.980821,0.921967,0.978221,0.978808,...,0.965319,0.977008,0.967670,0.970665,1.000000,0.961616,0.983767,0.934309,1.000000,0.978741


In [19]:
item_sim_df.columns

Index(['Retrievers (Labrador)', 'French Bulldogs', 'German Shepherd Dogs',
       'Retrievers (Golden)', 'Bulldogs', 'Poodles', 'Beagles', 'Rottweilers',
       'Pointers (German Shorthaired)', 'Dachshunds',
       ...
       'Chinooks', 'Finnish Spitz', 'Grand Basset Griffon Vendeens',
       'Sloughis', 'Harriers', 'Cesky Terriers', 'American Foxhounds',
       'Azawakhs', 'English Foxhounds', 'Norwegian Lundehunds'],
      dtype='object', name='Breed', length=195)

In [20]:
item_sim_df["Bulldogs"].sort_values(ascending=False)[:5]

Breed
Bulldogs                       1.000000
Setters (English)              0.981495
Greater Swiss Mountain Dogs    0.980857
Dalmatians                     0.979958
Spaniels (English Springer)    0.977542
Name: Bulldogs, dtype: float64

In [21]:
item_sim_df["Pointers (German Shorthaired)"].sort_values(ascending=False)[:5]

Breed
Pointers (German Shorthaired)    1.000000
Vizslas                          0.993951
Weimaraners                      0.991489
Wirehaired Vizslas               0.989250
Rat Terriers                     0.989203
Name: Pointers (German Shorthaired), dtype: float64

# 클래스로 구현

In [31]:
class BreedRecommender():
    def __init__(self):
        breed_favor = pd.read_csv("Breed_Traits.csv")

        breed_favor["Breed"] = breed_favor["Breed"].str.replace("\xa0", " ")

        coat_type_mapper = {"Short": 1, "Medium": 3, "Long": 5}
        breed_favor["Coat Length"] = breed_favor["Coat Length"].apply(lambda coat_type: coat_type_mapper[coat_type])

        breed_favor.drop("Coat Type", axis=1, inplace=True)
        breed_favor.set_index("Breed", inplace=True)

        item_sim = cosine_similarity(breed_favor, breed_favor)
        item_sim_df = pd.DataFrame(data=item_sim, index=breed_favor.index, columns=breed_favor.index)

        self.recommend_df = item_sim_df

    def recommend(self, breed_name:str):
        return self.recommend_df[breed_name].sort_values(ascending=False)[:5].index.tolist()

In [32]:
test = BreedRecommender()
test.recommend("Pointers (German Shorthaired)")

['Pointers (German Shorthaired)',
 'Vizslas',
 'Weimaraners',
 'Wirehaired Vizslas',
 'Rat Terriers']