# Qiitaユーザーデータの整形

In [18]:
import os
import json
import ast
import pandas as pd

## データ取得

In [6]:
qiita_item_df = pd.read_csv('data/extracted_qiita_item.csv')

In [7]:
qiita_item_df.shape

(300, 17)

In [8]:
qiita_item_df.head(1)

Unnamed: 0,body,coediting,comments_count,created_at,group,id,likes_count,page_views_count,private,reactions_count,rendered_body,tags,team_membership,title,updated_at,url,user
0,NCMB（ニフクラ mobile backend）のSwift SDKをインストールする方法...,False,0,2021-08-07T11:27:55+09:00,,c95026861f160449a83b,0,,False,0,<p>NCMB（ニフクラ mobile backend）のSwift SDKをインストールす...,"[{'name': 'NCMB', 'versions': []}, {'name': 'S...",,NCMBのSwift SDKをSwift Packagesでインストールする,2021-08-07T11:27:55+09:00,https://qiita.com/goofmint/items/c95026861f160...,{'description': 'MOONGIFT CEO. ニフクラ mobile bac...


## 関数定義

In [22]:
def extract_user_id(user_dict_str):
    """
    QiitaユーザーIDを抽出する
    辞書型の文字列を辞書型に変換する
    辞書からQiitaユーザーIDを取得する
    """
    user_dict = ast.literal_eval(user_dict_str)
    return user_dict['id']

In [33]:
def create_qiita_user_df(qiita_item_df):
    """
    qiita_item_dfからqiita_user_dfを生成する
    """
    qiita_user_str_list = qiita_item_df['user'].to_list()
    qiita_user_dict_list = []
    for qiita_user_str in qiita_user_str_list:
        qiita_user_dict = ast.literal_eval(qiita_user_str)
        qiita_user_dict_list.append(qiita_user_dict)
    return pd.DataFrame.from_records(qiita_user_dict_list)

## 実行

### ユーザーID取得

In [24]:
qiita_item_df['user_id'] = qiita_item_df['user'].map(extract_user_id)

In [25]:
qiita_item_df.to_csv('result/qiita_item_user_id.csv', index=False, quoting=1, line_terminator='\r\n')

### ユーザーデータ取得

In [34]:
qiita_user_df = create_qiita_user_df(qiita_item_df)

In [35]:
qiita_user_df.shape

(300, 16)

In [36]:
qiita_user_df.head(1)

Unnamed: 0,description,facebook_id,followees_count,followers_count,github_login_name,id,items_count,linkedin_id,location,name,organization,permanent_id,profile_image_url,team_only,twitter_screen_name,website_url
0,MOONGIFT CEO. ニフクラ mobile backend & FRONT-END....,goofmint,1,43,,goofmint,107,goofmint,"Yokohama, Japan",中津川 篤司,MOONGIFT,197026,https://qiita-image-store.s3.amazonaws.com/0/1...,False,goofmint,https://www.moongift.jp/


In [37]:
qiita_user_df.to_csv('result/qiita_user.csv', index=False, quoting=1, line_terminator='\r\n')

In [38]:
qiita_user_df.columns

Index(['description', 'facebook_id', 'followees_count', 'followers_count',
       'github_login_name', 'id', 'items_count', 'linkedin_id', 'location',
       'name', 'organization', 'permanent_id', 'profile_image_url',
       'team_only', 'twitter_screen_name', 'website_url'],
      dtype='object')

## 実験

### ユーザーID取得実験

In [13]:
user_dict_str = qiita_item_df.iloc[0, 16]

In [14]:
user_dict_str

"{'description': 'MOONGIFT CEO. ニフクラ mobile backend & FRONT-END.AI エバンジェリスト', 'facebook_id': 'goofmint', 'followees_count': 1, 'followers_count': 43, 'github_login_name': None, 'id': 'goofmint', 'items_count': 107, 'linkedin_id': 'goofmint', 'location': 'Yokohama, Japan', 'name': '中津川 篤司', 'organization': 'MOONGIFT', 'permanent_id': 197026, 'profile_image_url': 'https://qiita-image-store.s3.amazonaws.com/0/197026/profile-images/1514355124', 'team_only': False, 'twitter_screen_name': 'goofmint', 'website_url': 'https://www.moongift.jp/'}"

In [15]:
type(user_dict_str)

str

In [19]:
user_dict = ast.literal_eval(user_dict_str)

In [20]:
user_dict

{'description': 'MOONGIFT CEO. ニフクラ mobile backend & FRONT-END.AI エバンジェリスト',
 'facebook_id': 'goofmint',
 'followees_count': 1,
 'followers_count': 43,
 'github_login_name': None,
 'id': 'goofmint',
 'items_count': 107,
 'linkedin_id': 'goofmint',
 'location': 'Yokohama, Japan',
 'name': '中津川 篤司',
 'organization': 'MOONGIFT',
 'permanent_id': 197026,
 'profile_image_url': 'https://qiita-image-store.s3.amazonaws.com/0/197026/profile-images/1514355124',
 'team_only': False,
 'twitter_screen_name': 'goofmint',
 'website_url': 'https://www.moongift.jp/'}

In [21]:
type(user_dict)

dict

In [23]:
extract_user_id(user_dict_str)

'goofmint'

### ユーザーデータ取得実験

In [29]:
qiita_user_str_list = qiita_item_df['user'].to_list()

In [30]:
qiita_user_dict_list = []
for qiita_user_str in qiita_user_str_list:
    qiita_user_dict = ast.literal_eval(qiita_user_str)
    qiita_user_dict_list.append(qiita_user_dict)

In [31]:
len(qiita_user_dict_list)

300

In [32]:
qiita_user_dict_list[:3]

[{'description': 'MOONGIFT CEO. ニフクラ mobile backend & FRONT-END.AI エバンジェリスト',
  'facebook_id': 'goofmint',
  'followees_count': 1,
  'followers_count': 43,
  'github_login_name': None,
  'id': 'goofmint',
  'items_count': 107,
  'linkedin_id': 'goofmint',
  'location': 'Yokohama, Japan',
  'name': '中津川 篤司',
  'organization': 'MOONGIFT',
  'permanent_id': 197026,
  'profile_image_url': 'https://qiita-image-store.s3.amazonaws.com/0/197026/profile-images/1514355124',
  'team_only': False,
  'twitter_screen_name': 'goofmint',
  'website_url': 'https://www.moongift.jp/'},
 {'description': '元・組込みミドルウェアエンジニア.\r\n現・DevOpsおよびDjangoバッグエンドエンジニア.\r\n保有資格: LPIC3\r\n\r\n\r\n\r\n',
  'facebook_id': '',
  'followees_count': 0,
  'followers_count': 1,
  'github_login_name': None,
  'id': 'robozushi10',
  'items_count': 63,
  'linkedin_id': '',
  'location': '',
  'name': 'ロボ 寿司',
  'organization': '',
  'permanent_id': 1246056,
  'profile_image_url': 'https://s3-ap-northeast-1.amazonaws.com/qiita-image