# Scraping data from Trusted Shops - Alternative program

In [1]:
# Import required packages
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import json
import ast

## Function to collect reviews data for a given supplier

In [2]:
def get_reviews(url, n_pages, file):
    """ Collecte les avis listés dans les n pages de résultats de trustedShop à l'adresse "url"
        et sauvegarde les données dans un fichier file.csv
    """
    data = pd.DataFrame({})
    for i in range(1, n_pages+1):

        url1 = url+"?page={}".format(i)
        req = requests.get(url1)
        soup = BeautifulSoup(req.content,"html")
        
        # les données ciblés sont dansle premier script dans body"
        raw0 = json.loads(soup.body.script.contents[0])
        reviews0 = raw0['props']['pageProps']['reviews']['reviews']
        
        # les données sont mis dans un pandas DataFrame
        data = pd.concat([data, pd.DataFrame(reviews0)])

        # pour suivre l'exécution du programme
        print("Page", i)

    # change the index
    data = data.set_index("id")
    
    # Sauvegarde des données collectée dans un fchier csv
    data.to_csv(file+'.csv', sep = ";")


#### Test

In [214]:
url = "https://www.trustedshops.fr/evaluation/info_XA3F5BBDE34B5AD3FB64015EB823AC7C6.html"
n_pages = 1
file = "test0"
get_reviews(url, n_pages, file)
df = pd.read_csv(file+".csv", sep = ";", index_col = 0)
#print(df.info())
df.head(1)


Page 1


Unnamed: 0_level_0,rating,title,comment,createdAt,reply,customer,transaction,totalLikeCount,provider,inModeration,verificationStatus
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rev-ac07cd9d-a2ba-4f62-835f-54b7fcc11c92,4,Bonjour,Bonjour\n\nJ'avoue que j'appréhendais que mon ...,1694849514000,"{'createdAt': 1696661962000, 'comment': ""Bonjo...",{},{'date': 1693785600000},1.0,Trusted Shops GmbH,False,MEMBER_VERIFIED


### Collect raw review data

In [3]:
# collect reviews data of "RAYON D'OR BAGAGES"
url = "https://www.trustedshops.fr/evaluation/info_X5877FAE851EE366564CE1EC1604DF2B0.html"
n_pages = 55
file = "rayonOrBags_raw"
#get_reviews(url, n_pages, file)
df = pd.read_csv(file+".csv", sep = ";", index_col = 0)
display(df.head())
df.info()

Unnamed: 0_level_0,rating,title,comment,createdAt,reply,customer,transaction,provider,inModeration,verificationStatus,totalLikeCount,updatedAt
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
rev-a2bf124b-dfd2-4ea9-ad92-75a7ace2ed61,5,Objet Conforme et prix imbattable,"Franchement, vu le prix, j'ai pensé à une arna...",1696596179000,{'comment': ''},{},{},Trusted Shops GmbH,False,MEMBER_VERIFIED,,
rev-4ded3142-1b00-48ac-8dc2-b1df7c34f24e,5,Achat très facile sur le site,Achat très facile sur le site. On bénéficie de...,1694722361000,{'comment': ''},{},{},Trusted Shops GmbH,False,MEMBER_VERIFIED,,
rev-3e01aed1-74ca-4d1f-91c6-7b36c904d68a,4,Livraison très rapide de l'article…,Livraison très rapide de l'article commandé !\...,1695924973000,{'comment': ''},"{'id': '0803644f8b1292951a16116b069e0055', 'fi...",{},Trusted Shops GmbH,False,MEMBER_VERIFIED,,
rev-ff51282e-0601-47e4-8384-c2d598b2fae6,5,Valise cabine Samsonite silver,Nous avons reçu notre valise dans un délai inf...,1695786724000,{'comment': ''},"{'id': '485fed5bd5d1af7a06eef10c07512296', 'fi...",{},Trusted Shops GmbH,False,MEMBER_VERIFIED,,
rev-98884d0e-8be6-4080-a3d2-2c10b7ea8db1,5,Prise en compte quasi immédiate de la…,Prise en compte quasi immédiate de la non livr...,1696876196000,{'comment': ''},{},{},Trusted Shops GmbH,False,MEMBER_VERIFIED,,


<class 'pandas.core.frame.DataFrame'>
Index: 1092 entries, rev-a2bf124b-dfd2-4ea9-ad92-75a7ace2ed61 to rev-2efc7fd6-311c-4f90-8b4d-9748aec7fc09
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   rating              1092 non-null   int64  
 1   title               741 non-null    object 
 2   comment             734 non-null    object 
 3   createdAt           1092 non-null   int64  
 4   reply               1092 non-null   object 
 5   customer            1092 non-null   object 
 6   transaction         1092 non-null   object 
 7   provider            1092 non-null   object 
 8   inModeration        1092 non-null   bool   
 9   verificationStatus  1092 non-null   object 
 10  totalLikeCount      16 non-null     float64
 11  updatedAt           2 non-null      float64
dtypes: bool(1), float64(2), int64(2), object(7)
memory usage: 103.4+ KB


In [6]:
# collect reviews data of "LA REDOUTE"
url = "https://www.trustedshops.fr/evaluation/info_XA3F5BBDE34B5AD3FB64015EB823AC7C6.html"
n_pages = 1738
file = "redoute_raw"
get_reviews(url, n_pages, file)
df = pd.read_csv(file+".csv", sep = ";", index_col = 0)
display(df.head())
df.info()

Page 1
Page 2
Page 3
Page 4
Page 5
Page 6
Page 7
Page 8
Page 9
Page 10
Page 11
Page 12
Page 13
Page 14
Page 15
Page 16
Page 17
Page 18
Page 19
Page 20
Page 21
Page 22
Page 23
Page 24
Page 25
Page 26
Page 27
Page 28
Page 29
Page 30
Page 31
Page 32
Page 33
Page 34
Page 35
Page 36
Page 37
Page 38
Page 39
Page 40
Page 41
Page 42
Page 43
Page 44
Page 45
Page 46
Page 47
Page 48
Page 49
Page 50
Page 51
Page 52
Page 53
Page 54
Page 55
Page 56
Page 57
Page 58
Page 59
Page 60
Page 61
Page 62
Page 63
Page 64
Page 65
Page 66
Page 67
Page 68
Page 69
Page 70
Page 71
Page 72
Page 73
Page 74
Page 75
Page 76
Page 77
Page 78
Page 79
Page 80
Page 81
Page 82
Page 83
Page 84
Page 85
Page 86
Page 87
Page 88
Page 89
Page 90
Page 91
Page 92
Page 93
Page 94
Page 95
Page 96
Page 97
Page 98
Page 99
Page 100
Page 101
Page 102
Page 103
Page 104
Page 105
Page 106
Page 107
Page 108
Page 109
Page 110
Page 111
Page 112
Page 113
Page 114
Page 115
Page 116
Page 117
Page 118
Page 119
Page 120
Page 121
Page 122
Page 123
P

Unnamed: 0_level_0,rating,title,comment,createdAt,reply,customer,transaction,totalLikeCount,provider,inModeration,verificationStatus,updatedAt
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
rev-ac07cd9d-a2ba-4f62-835f-54b7fcc11c92,4,Bonjour,Bonjour\n\nJ'avoue que j'appréhendais que mon ...,1694849514000,"{'createdAt': 1696661962000, 'comment': ""Bonjo...",{},{'date': 1693785600000},1.0,Trusted Shops GmbH,False,MEMBER_VERIFIED,
rev-cc907a30-4fb0-4e55-a4a1-307b89eb0d99,4,Facilte,"Facilte, sollicitations dosées...qualité..on p...",1696515522000,{'comment': ''},{},{'date': 1695427200000},,Trusted Shops GmbH,False,MEMBER_VERIFIED,
rev-b54467c4-4769-42e3-a74a-c72c000059f9,4,Jolie table mais plus pour 6 que pour 8,Livraison impeccable avec possibilité de prend...,1696252107000,{'comment': ''},{},{'date': 1694390400000},,Trusted Shops GmbH,False,MEMBER_VERIFIED,
rev-b480f537-524e-491e-9f85-74405332c047,4,Achat d'un canapé La Redoute intérieurs…,Achat d'un canapé La Redoute intérieurs le 6/9...,1695055941000,{'comment': ''},{},{'date': 1693958400000},,Trusted Shops GmbH,False,MEMBER_VERIFIED,
rev-f04626f5-846d-41ca-9866-46573746f75b,4,excellent site mais des changements qui compl...,je commande depuis longtemps chez la redoute; ...,1697187436000,{'comment': ''},{},{'date': 1696204800000},,Trusted Shops GmbH,False,MEMBER_VERIFIED,


<class 'pandas.core.frame.DataFrame'>
Index: 34753 entries, rev-ac07cd9d-a2ba-4f62-835f-54b7fcc11c92 to rev-de0535f2-1569-4d2c-b3d7-95db2e8d025e
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   rating              34753 non-null  int64  
 1   title               27560 non-null  object 
 2   comment             27279 non-null  object 
 3   createdAt           34753 non-null  int64  
 4   reply               34753 non-null  object 
 5   customer            34753 non-null  object 
 6   transaction         34753 non-null  object 
 7   totalLikeCount      8 non-null      float64
 8   provider            34753 non-null  object 
 9   inModeration        34753 non-null  bool   
 10  verificationStatus  34753 non-null  object 
 11  updatedAt           32 non-null     float64
dtypes: bool(1), float64(2), int64(2), object(7)
memory usage: 3.2+ MB


In [5]:
#  collect reviews data of MECHATECHNIC FR
url = "https://www.trustedshops.fr/evaluation/info_X7CF89FDC3EBA9A38117A700B8AA37DCB.html"
n_pages = 430
file = "mechatech_raw"
get_reviews(url, n_pages, file)
df = pd.read_csv(file+".csv", sep = ";", index_col = 0)
display(df.head())
df.info()

Page 1
Page 2
Page 3
Page 4
Page 5
Page 6
Page 7
Page 8
Page 9
Page 10
Page 11
Page 12
Page 13
Page 14
Page 15
Page 16
Page 17
Page 18
Page 19
Page 20
Page 21
Page 22
Page 23
Page 24
Page 25
Page 26
Page 27
Page 28
Page 29
Page 30
Page 31
Page 32
Page 33
Page 34
Page 35
Page 36
Page 37
Page 38
Page 39
Page 40
Page 41
Page 42
Page 43
Page 44
Page 45
Page 46
Page 47
Page 48
Page 49
Page 50
Page 51
Page 52
Page 53
Page 54
Page 55
Page 56
Page 57
Page 58
Page 59
Page 60
Page 61
Page 62
Page 63
Page 64
Page 65
Page 66
Page 67
Page 68
Page 69
Page 70
Page 71
Page 72
Page 73
Page 74
Page 75
Page 76
Page 77
Page 78
Page 79
Page 80
Page 81
Page 82
Page 83
Page 84
Page 85
Page 86
Page 87
Page 88
Page 89
Page 90
Page 91
Page 92
Page 93
Page 94
Page 95
Page 96
Page 97
Page 98
Page 99
Page 100
Page 101
Page 102
Page 103
Page 104
Page 105
Page 106
Page 107
Page 108
Page 109
Page 110
Page 111
Page 112
Page 113
Page 114
Page 115
Page 116
Page 117
Page 118
Page 119
Page 120
Page 121
Page 122
Page 123
P

Unnamed: 0_level_0,rating,title,comment,createdAt,reply,customer,transaction,provider,inModeration,verificationStatus,totalLikeCount
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rev-40400ab0-b30b-4389-8afa-adf3c5666622,5,Site ou téléphone ?,"Bonjour, \nD'habitude je passe commande par té...",1696239870000,{'comment': ''},{},{},Trusted Shops GmbH,False,MEMBER_VERIFIED,
rev-11d4fcd8-34da-48ae-8ce3-0dd15055b0d9,5,Cela fait pus de 15 ans que je commande…,Cela fait pus de 15 ans que je commande des pi...,1696011857000,{'comment': ''},{},{},Trusted Shops GmbH,False,MEMBER_VERIFIED,
rev-f3b26c2d-2282-433d-89ce-34f4e9f37234,5,total satisfaction,total satisfaction ! piece conforme à l origin...,1696762213000,{'comment': ''},{},{},Trusted Shops GmbH,False,MEMBER_VERIFIED,
rev-82969238-1c63-4e31-acb5-8665bfc490a0,4,Bien mais communication écrite à parfaire.,La possibilité de téléphoner pour confirmer un...,1695070187000,"{'createdAt': 1695647641000, 'comment': 'Merci...",{},{},Trusted Shops GmbH,False,MEMBER_VERIFIED,
rev-8a661eb7-b6d9-4f9e-b25a-4bb58a65dbb2,4,Bonjour je suis satisfait de l’envoi de…,Bonjour je suis satisfait de l’envoi de ma piè...,1695583333000,"{'createdAt': 1695648121000, 'comment': ""Merci...",{},{},Trusted Shops GmbH,False,MEMBER_VERIFIED,1.0


<class 'pandas.core.frame.DataFrame'>
Index: 6245 entries, rev-40400ab0-b30b-4389-8afa-adf3c5666622 to rev-dfdbb6b1-4be5-4554-8544-2d8fc2812e67
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   rating              6245 non-null   int64  
 1   title               3208 non-null   object 
 2   comment             3178 non-null   object 
 3   createdAt           6245 non-null   int64  
 4   reply               6245 non-null   object 
 5   customer            6245 non-null   object 
 6   transaction         6245 non-null   object 
 7   provider            6245 non-null   object 
 8   inModeration        6245 non-null   bool   
 9   verificationStatus  6245 non-null   object 
 10  totalLikeCount      3 non-null      float64
dtypes: bool(1), float64(1), int64(2), object(7)
memory usage: 542.8+ KB
