In [278]:
import pandas as pd
import numpy as np
import json
import string
import textacy.preprocessing as tprep
from datetime import datetime, timezone
from app_store_scraper import AppStore
from pandas import json_normalize

from sklearn.feature_extraction.text import TfidfVectorizer

import spacy
nlp = spacy.load('en_core_web_sm')
from spacy.tokenizer import Tokenizer
from spacy.lang.en.stop_words import STOP_WORDS
nlp.Defaults.stop_words |= {"find","found", "like", "feel", "recommend"}
nlp.Defaults.stop_words.remove("no")

In [279]:
def return_apple_reviews(app_name, app_id, max_reviews, country='us'):
    '''
    Return dataframe with reviews.
    '''
    app_reviews = AppStore(country='us', app_name=app_name, app_id = app_id)
    app_reviews.review(how_many=max_reviews)
    app_df = pd.DataFrame.from_dict(app_reviews.reviews)
    # Breaks up column developer reponses from one json string into multiple columns
    app_df[['developer_id', 
            'developer_response', 
            'developer_response_modified']] = json_normalize(app_df['developerResponse'])
    
    #Cleaning Date Formats
    app_df['developer_response_date'] = pd.to_datetime(app_df['developer_response_modified'])
    app_df['developer_response_date'] = app_df['developer_response_date'].apply(lambda d: d.replace(tzinfo=None))
    app_df['customer_rating_date'] = pd.to_datetime(app_df['date'])
    
    #Drop unneeded columns
    #app_df = app_df.drop(['developerResponse', 'developer_response_modified', 'date'], axis=1)
    
    #Metric Creation
    app_df['days_until_responce'] = (app_df['developer_response_date']- app_df['customer_rating_date']).dt.days
    
    return app_df

In [280]:
df = return_apple_reviews('found', 1581179653, 2000)

2023-03-06 14:12:07,827 [INFO] Base - Initialised: AppStore('us', 'found', 1581179653)
2023-03-06 14:12:07,830 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/us/app/found/id1581179653
2023-03-06 14:12:08,273 [INFO] Base - [id:1581179653] Fetched 82 reviews (82 fetched in total)


In [281]:
def clean_ratings(text):
    #instantiate spacy class
    doc = nlp(text)
    
    cleaned_doc = []
    
    #remove stop words & punctuation, lemmatize text, lowercase text, remove any extra spaces before or after
    for token in doc:
        if token.is_stop == False and token.pos_ not in ("PUNCT", "NUM"):
            cleaned_doc.append(token.lemma_.lower().strip())
            
    return " ".join(cleaned_doc)

In [282]:
df['customer_review_cleaned'] = df['review'].apply(clean_ratings)

In [283]:
vectorizer = TfidfVectorizer(
                                max_features=10,
                                max_df=.9,
                                min_df=2,
                                ngram_range = (2,3),
                                stop_words = "english"
                            )

In [284]:
high_ratings_result = vectorizer.fit_transform(df[df['rating']>=4]['customer_review_cleaned']).toarray()
high_rate = pd.DataFrame(high_ratings_result, columns = vectorizer.get_feature_names_out())
high_rate.columns = ["word_" + str(x) for x in high_rate.columns]
high_rate.index = df[df['rating']>=4].index
high_reviews_df = pd.concat([df[df['rating']>=4], high_rate], axis=1)

In [285]:
high_rate.columns

Index(['word_app help', 'word_coach awesome', 'word_easy use',
       'word_help track', 'word_log routine', 'word_lose weight',
       'word_love app', 'word_love program', 'word_user friendly',
       'word_weight loss'],
      dtype='object')

In [286]:
high_reviews_df

Unnamed: 0,date,developerResponse,review,rating,isEdited,userName,title,developer_id,developer_response,developer_response_modified,...,word_app help,word_coach awesome,word_easy use,word_help track,word_log routine,word_lose weight,word_love app,word_love program,word_user friendly,word_weight loss
0,2022-10-04 22:43:00,"{'id': 32461912, 'body': 'Thank you for your 5...",I’ve been on the found program for approximate...,5,False,Dannieg3212,I love this! Im gaining confidence in myself🤍,32461912.0,Thank you for your 5-star rating! We are so ex...,2022-10-09T01:39:19Z,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2022-03-11 19:33:23,,Personalized program with the ongoing support ...,5,False,TiMarie83,Great program and Useful App,,,,...,0.0,0.0,0.0,0.0,0.0,0.904649,0.0,0.0,0.0,0.426158
5,2022-03-25 15:22:57,,The program is fine but I’m not a fan of the a...,4,False,lsduncan8,The program is good not a fan of the app,,,,...,0.0,0.0,0.0,0.0,0.0,0.727845,0.0,0.0,0.685741,0.0
8,2022-09-29 16:59:33,"{'id': 32328657, 'body': 'You made our day! Th...","I started Found in July 2022, by September I w...",4,False,candlegal,Finally got my appetite under control!!!,32328657.0,You made our day! Thank you for reviewing the ...,2022-09-30T23:27:47Z,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13,2022-03-24 19:43:15,,Speaking in regard to the program holistically...,5,False,LadyLuluBelle,360 Weight + Health + Well-being - how refresh...,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
15,2021-12-02 00:50:21,,I love that you can easily log your routines a...,5,False,Bettywhitesrus,User friendly app!,,,,...,0.0,0.0,0.0,0.0,0.727845,0.0,0.0,0.0,0.685741,0.0
16,2022-03-16 16:19:13,,This review is solely of the app. It’s a simpl...,4,False,Tx9999999,"Fairly good app, great program!",,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19,2022-09-30 16:48:36,"{'id': 32410684, 'body': 'This is what we love...",Found has been a game changer for me. I’ve tri...,5,False,Shananigans07,Game changer,32410684.0,This is what we love to hear! We are cheering ...,2022-10-05T20:12:07Z,...,0.0,0.0,0.727845,0.0,0.0,0.0,0.0,0.0,0.0,0.685741
20,2022-04-13 20:53:43,"{'id': 29194132, 'body': 'Thank you so much fo...",This app is a fantastic tool to help you with ...,5,False,i write reviews.com,Awesome app!,29194132.0,Thank you so much for your thoughtful feedback...,2022-04-15T13:57:52Z,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
22,2021-12-13 22:48:09,,This new app works well with the key component...,5,False,Avid_ebook_reader,Good companion app to the program,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [287]:
low_ratings_result = vectorizer.fit_transform(df[df['rating']<4]['customer_review_cleaned']).toarray()
low_rate = pd.DataFrame(low_ratings_result, columns = vectorizer.get_feature_names_out())

In [288]:
low_rate.columns

Index(['lose weight', 'physical activity', 'prescribe medication',
       'pretty useless', 'program app', 'response support',
       'second medication', 'support team', 'weight app', 'weight loss'],
      dtype='object')

In [268]:
df[df['rating']<4]

Unnamed: 0,date,developerResponse,review,rating,isEdited,userName,title,developer_id,developer_response,developer_response_modified,developer_response_date,customer_rating_date,days_until_responce,customer_review_cleaned
1,2022-09-11 11:54:54,"{'id': 31975629, 'body': 'We apologize for the...",I’m so discouraged by found. The so-called hea...,1,False,E0619,Preying on people needing support,31975629.0,We apologize for the less-than-ideal experienc...,2022-09-12T17:59:35Z,2022-09-12 17:59:35,2022-09-11 11:54:54,1.0,discourage call health coach barely check foll...
2,2023-02-15 17:39:12,"{'id': 34935885, 'body': 'Thank you for taking...",Absolutely do not recommend this company. Firs...,1,False,lizshagil,Do not recommend,34935885.0,Thank you for taking the time to leave a revie...,2023-02-17T22:50:34Z,2023-02-17 22:50:34,2023-02-15 17:39:12,2.0,absolutely not recommend company offer coach p...
4,2022-07-18 21:39:43,"{'id': 31022317, 'body': 'We're so sorry to he...",The doctors on this service are never consiste...,1,False,mis2008,"A dangerous, negligent scam",31022317.0,We're so sorry to hear about your experience. ...,2022-07-21T16:24:40Z,2022-07-21 16:24:40,2022-07-18 21:39:43,2.0,doctor service consistent disregard concern in...
6,2022-09-25 00:12:07,,I was targeted by ads that claimed they factor...,1,False,DOM CL,"No Support, Meds Sent me to Urgent Care",,,,NaT,2022-09-25 00:12:07,,target ad claim factor genetic dieting keep ge...
7,2022-07-18 16:46:12,"{'id': 30985564, 'body': 'Our team did not rec...",If I could rate zero stars I would. \n\nI left...,1,False,authykat,Update: support will not answer,30985564.0,"Our team did not receive a response, but we de...",2022-07-20T04:13:53Z,2022-07-20 04:13:53,2022-07-18 16:46:12,1.0,rate star leave review state issue day refund...
9,2022-09-04 22:00:49,"{'id': 31975490, 'body': 'We're so sorry to he...",This app claims to have made a bunch of change...,3,False,coupfourré,Liked it better before,31975490.0,We're so sorry to hear about your experience. ...,2022-09-12T17:53:36Z,2022-09-12 17:53:36,2022-09-04 22:00:49,7.0,app claim bunch change recently ditch coach ne...
10,2021-12-01 17:16:43,,"For a first version, it’s ok. But it’s not Int...",3,False,Drama77,Not intuitive,,,,NaT,2021-12-01 17:16:43,,version ok not intuitive main thing need track...
11,2022-09-25 14:13:43,"{'id': 32021270, 'body': 'We apologize for the...",I CANCELED AND THEY CHARGED EVEN TOUGH AND DID...,1,True,Anamilli,Not that easy and doctors don’t answer questions,32021270.0,We apologize for the less-than-ideal experienc...,2022-09-25T14:13:44Z,2022-09-25 14:13:44,2022-09-25 14:13:43,0.0,cancel charge tough didn not give reimbursemen...
12,2022-05-04 17:32:55,"{'id': 29603318, 'body': 'Thank you so much fo...","Works just fine, but it’s a little clunky to o...",3,False,Socketwrench,It’s Okay,29603318.0,Thank you so much for this valuable feedback. ...,2022-05-06T21:56:27Z,2022-05-06 21:56:27,2022-05-04 17:32:55,2.0,work fine little clunky operate sync exercise ...
14,2022-06-22 15:46:23,"{'id': 30647366, 'body': 'I apologize for the ...",My experience with Found was not positive and ...,2,False,Alarmed-User65,Not A Good Fit,30647366.0,I apologize for the less-than-ideal experience...,2022-06-30T17:15:21Z,2022-06-30 17:15:21,2022-06-22 15:46:23,8.0,experience not positive cancel membership ini...


In [60]:
dense = vectors.todense()
denselist = dense.tolist()