# Qiitaタグデータの整形

In [1]:
import os
import json
import ast
import pandas as pd

## データ取得

In [2]:
qiita_item_df = pd.read_csv('data/extracted_qiita_item.csv')

In [3]:
qiita_item_df.shape

(300, 17)

In [4]:
qiita_item_df.head(1)

Unnamed: 0,body,coediting,comments_count,created_at,group,id,likes_count,page_views_count,private,reactions_count,rendered_body,tags,team_membership,title,updated_at,url,user
0,NCMB（ニフクラ mobile backend）のSwift SDKをインストールする方法...,False,0,2021-08-07T11:27:55+09:00,,c95026861f160449a83b,0,,False,0,<p>NCMB（ニフクラ mobile backend）のSwift SDKをインストールす...,"[{'name': 'NCMB', 'versions': []}, {'name': 'S...",,NCMBのSwift SDKをSwift Packagesでインストールする,2021-08-07T11:27:55+09:00,https://qiita.com/goofmint/items/c95026861f160...,{'description': 'MOONGIFT CEO. ニフクラ mobile bac...


In [18]:
def create_qiita_item_to_tag_relation_df(qiita_item_df):
    """
    qiita_item_dfからqiita_item_to_tag_relation_dfを生成する
    """
    qiita_item_to_tag_relation_list = []
    for index, row in qiita_item_df.iterrows():
        qiita_item_series = qiita_item_df.iloc[index]
        qiita_item_id = qiita_item_series['id']
        qiita_item_tags_str = qiita_item_series['tags']
        qiita_item_tags = ast.literal_eval(qiita_item_tags_str)
        
        for qiita_item_tag_dict in qiita_item_tags:
            qiita_item_to_tag_relation_dict = {}
            qiita_item_to_tag_relation_dict['item_id'] = qiita_item_id
            qiita_item_to_tag_relation_dict['tag_name'] = qiita_item_tag_dict['name']
            qiita_item_to_tag_relation_list.append(qiita_item_to_tag_relation_dict)
    return pd.DataFrame.from_records(qiita_item_to_tag_relation_list)

In [37]:
def create_qiita_tag_df(qiita_item_df):
    """
    qiita_item_dfからqiita_tag_dfを生成する
    """
    qiita_tag_list = []
    for index, row in qiita_item_df.iterrows():
        qiita_item_series = qiita_item_df.iloc[index]
        qiita_item_tags_str = qiita_item_series['tags']
        qiita_item_tags = ast.literal_eval(qiita_item_tags_str)
        
        for qiita_item_tag in qiita_item_tags:
            qiita_tag_list.append(qiita_item_tag['name'])
    # タグデータはマスターとして扱うため重複を削除したいので、set型に変換する
    qiita_tag_set = set(qiita_tag_list)
    return pd.DataFrame(qiita_tag_set, columns =['tag_name'])

## 実行

### 投稿データとタグデータの関連データ

In [19]:
qiita_item_to_tag_relation_df = create_qiita_item_to_tag_relation_df(qiita_item_df)

In [20]:
qiita_item_to_tag_relation_df.shape

(855, 2)

In [21]:
qiita_item_to_tag_relation_df.head(1)

Unnamed: 0,item_id,tag_name
0,c95026861f160449a83b,NCMB


In [22]:
qiita_item_to_tag_relation_df.to_csv('result/qiita_item_to_tag_relation.csv', index=False, quoting=1, line_terminator='\r\n')

### タグデータ

In [38]:
qiita_tag_df = create_qiita_tag_df(qiita_item_df)

In [39]:
qiita_tag_df.shape

(489, 1)

In [40]:
qiita_tag_df.head(1)

Unnamed: 0,tag_name
0,インフラ


In [41]:
qiita_tag_df.to_csv('result/qiita_tag.csv', index=False, quoting=1, line_terminator='\r\n')

## 実験

### タグリストが取得できることの実験

In [5]:
tag_list_str = qiita_item_df.iloc[0, 11]

In [6]:
tag_list_str

"[{'name': 'NCMB', 'versions': []}, {'name': 'Swift', 'versions': []}, {'name': 'SwiftUI', 'versions': []}]"

In [7]:
tag_list = ast.literal_eval(tag_list_str)

In [8]:
tag_list

[{'name': 'NCMB', 'versions': []},
 {'name': 'Swift', 'versions': []},
 {'name': 'SwiftUI', 'versions': []}]

In [9]:
type(tag_list)

list

### データフレームの操作実験

In [13]:
qiita_item_series = qiita_item_df.iloc[0]

In [14]:
type(qiita_item_series)

pandas.core.series.Series

In [15]:
qiita_item_series

body                NCMB（ニフクラ mobile backend）のSwift SDKをインストールする方法...
coediting                                                       False
comments_count                                                      0
created_at                                  2021-08-07T11:27:55+09:00
group                                                             NaN
id                                               c95026861f160449a83b
likes_count                                                         0
page_views_count                                                  NaN
private                                                         False
reactions_count                                                     0
rendered_body       <p>NCMB（ニフクラ mobile backend）のSwift SDKをインストールす...
tags                [{'name': 'NCMB', 'versions': []}, {'name': 'S...
team_membership                                                   NaN
title                          NCMBのSwift SDKをSwift Packagesでインストールする
updated_at          

In [16]:
qiita_item_series['id']

'c95026861f160449a83b'

In [17]:
qiita_item_series['tags']

"[{'name': 'NCMB', 'versions': []}, {'name': 'Swift', 'versions': []}, {'name': 'SwiftUI', 'versions': []}]"