In [1]:
# =======================================
# REDDIT DATA SCRAPER FOR FINTECH THESIS
# =======================================

In [1]:
# 1. Package Installation
# -----------------------
# Install `praw` for Reddit API interaction
!pip install praw



In [3]:
# 2. Import Required Libraries
# ----------------------------
import praw  # Reddit API wrapper
import pandas as pd
import requests
from requests.auth import HTTPBasicAuth
from IPython.display import display  # For nicer rendering in notebooks

In [4]:
# 3. Reddit API Authentication
# ----------------------------
# Write personal Reddit app credentials
CLIENT_ID = "...." 
CLIENT_SECRET = "...."  
USER_AGENT = "..."

# Initialize Reddit instance using PRAW 
reddit = praw.Reddit(
    client_id="...",  
    client_secret="....",  
    user_agent="....",  
)

In [5]:
# URL for getting an access token
token_url = "https://www.reddit.com/api/v1/access_token"

# Payload (parameters for authentication)
data = {
    "grant_type": "client_credentials"
}

# Headers
headers = {
    "User-Agent": USER_AGENT
}

# Send request
response = requests.post(token_url, 
                         auth=HTTPBasicAuth(CLIENT_ID, CLIENT_SECRET),
                         data=data,
                         headers=headers)

# Print response
if response.status_code == 200:
    print("Successfully obtained an access token")
    print("Access Token:", response.json()["access_token"])
else:
    print("Failed to authenticate.")
    print("Response:", response.json())


Successfully obtained an access token
Access Token: eyJhbGciOiJSUzI1NiIsImtpZCI6IlNIQTI1NjpzS3dsMnlsV0VtMjVmcXhwTU40cWY4MXE2OWFFdWFyMnpLMUdhVGxjdWNZIiwidHlwIjoiSldUIn0.eyJzdWIiOiJsb2lkIiwianRpIjoiY1c1SXBxOHhrQ2FlcVBMdkF6bHBsZGNxN3VWbDF3IiwiZXhwIjoxNzQ5MDQzNjU5LjAzMTQwOSwiaWF0IjoxNzQ4OTU3MjU5LjAzMTQwOSwiY2lkIjoiNGh4aGZMcUFLRXRmQ0JTWXAtTFdGZyIsImxpZCI6InQyXzFxcTNkNXV0cGIiLCJzY3AiOiJlSnlLVnRKU2lnVUVBQURfX3dOekFTYyIsImxjYSI6MTc0ODk1NzI1OTAxNiwiZmxvIjo2fQ.Qsl6WYrMWlCtODyPIVuACdRDil25RkeHUW6heBmPeR_hnrlliXd9j3cgjBC_bulf4K9xWQJ0dJEWr0lBMgn7CM_mGX4tv0Em6GzrrqNKW6iEa1Rg-wnOhBXfL-TBbXL7OTBkAm4_iGLV0oYifOHoC57PPQW9i_wtIO91WgPkacTQNRbeBJgpUvoI1OjTD30-anrRpEX5bI6_Bbme8s5_O4uhBOBCN60IOZniBg2VEysPiSbosN93Wftf63zUR_1CjbgqJOFNlUsoAyE4Ec3enLzJQuuPBetr1doEmcwMbq0Yc3LLKFGZYPlEr8MjTvGPY2QJu0Clohuj6bAx2qguqg


In [6]:
# 4. Extract Comments from a Specific Thread
# ------------------------------------------
# Target example: Revolut discussion thread
url = "https://www.reddit.com/r/Netherlands/comments/1d2efyh/revolut_in_the_netherlands/"
submission = reddit.submission(url=url)

In [7]:
# Expand all comments
submission.comments.replace_more(limit=None)

# Extract comments into a list
comments = [comment.body for comment in submission.comments.list()]

In [8]:
# Convert to a DataFrame
df = pd.DataFrame(comments, columns=["comment"])

# Display comments
print(df.head())

                                             comment
0  As many other banks that claim to be neo-banks...
1  Yeah would also not advice to go all in on Rev...
2  While I never had problem with revolut blockin...
3  Having an online bank as your primary bank is ...
4  I am using Revolt for traveling and paying for...


In [9]:
display(df) 

Unnamed: 0,comment
0,As many other banks that claim to be neo-banks...
1,Yeah would also not advice to go all in on Rev...
2,While I never had problem with revolut blockin...
3,Having an online bank as your primary bank is ...
4,I am using Revolt for traveling and paying for...
...,...
59,"You are right, I was thinking of the Flexible ..."
60,You can get that exact document as pdf in the ...
61,I had my traditional bank card blocked i was t...
62,"Yes, but some are oldfashioned and want either..."


# Scraping based on key terms

In [10]:
# 5. Generalized Search by Keyword
# --------------------------------
# Define search function by keyword and subreddit

# Authenticate Reddit API
reddit = praw.Reddit(
    client_id="....",  
    client_secret="...",  
    user_agent="....",  
)

In [11]:
# 6. Apply Scraper to Each App
# ----------------------------
# Fetch and save comments related to Revolut
keyword = "Revolut"
subreddit = "Netherlands"
limit_posts = 1500  


In [12]:
# Store results
all_comments = []

# Search threads matching the keyword
for submission in reddit.subreddit(subreddit).search(keyword, limit=limit_posts):
    submission.comments.replace_more(limit=None)
    for comment in submission.comments.list():
        all_comments.append({
            "post_title": submission.title,
            "comment": comment.body,
            "score": comment.score,
            "author": str(comment.author),
            "permalink": f"https://www.reddit.com{comment.permalink}"
        })

# Convert to DataFrame
df = pd.DataFrame(all_comments)

# Show the results
print(f"Found {len(df)} comments across {limit_posts} posts.")
display(df.head(10))




Found 6423 comments across 1500 posts.


Unnamed: 0,post_title,comment,score,author,permalink
0,Revolut in the Netherlands,As many other banks that claim to be neo-banks...,113,alphache,https://www.reddit.com/r/Netherlands/comments/...
1,Revolut in the Netherlands,Yeah would also not advice to go all in on Rev...,39,Dennisje182,https://www.reddit.com/r/Netherlands/comments/...
2,Revolut in the Netherlands,While I never had problem with revolut blockin...,11,code17220,https://www.reddit.com/r/Netherlands/comments/...
3,Revolut in the Netherlands,Having an online bank as your primary bank is ...,31,smdrdit,https://www.reddit.com/r/Netherlands/comments/...
4,Revolut in the Netherlands,I am using Revolt for traveling and paying for...,6,chia0tzu,https://www.reddit.com/r/Netherlands/comments/...
5,Revolut in the Netherlands,They have a Dutch IBAN now? That's pretty neat...,4,GodBjorn,https://www.reddit.com/r/Netherlands/comments/...
6,Revolut in the Netherlands,I’ve been living in the Netherlands for 3 year...,3,Chernobie,https://www.reddit.com/r/Netherlands/comments/...
7,Revolut in the Netherlands,Using Revolut for >6 years now (for traveling ...,2,philthuene,https://www.reddit.com/r/Netherlands/comments/...
8,Revolut in the Netherlands,Does Revolut work at places that only accept d...,2,bentrider,https://www.reddit.com/r/Netherlands/comments/...
9,Revolut in the Netherlands,"You do you, but I call you crazy for even cons...",1,KingOfCotadiellu,https://www.reddit.com/r/Netherlands/comments/...


In [13]:
# Convert to DataFrame
df = pd.DataFrame(all_comments)

# Show the results
print(f"Found {len(df)} comments across {limit_posts} posts.")
display(df.head(10))

Found 6423 comments across 1500 posts.


Unnamed: 0,post_title,comment,score,author,permalink
0,Revolut in the Netherlands,As many other banks that claim to be neo-banks...,113,alphache,https://www.reddit.com/r/Netherlands/comments/...
1,Revolut in the Netherlands,Yeah would also not advice to go all in on Rev...,39,Dennisje182,https://www.reddit.com/r/Netherlands/comments/...
2,Revolut in the Netherlands,While I never had problem with revolut blockin...,11,code17220,https://www.reddit.com/r/Netherlands/comments/...
3,Revolut in the Netherlands,Having an online bank as your primary bank is ...,31,smdrdit,https://www.reddit.com/r/Netherlands/comments/...
4,Revolut in the Netherlands,I am using Revolt for traveling and paying for...,6,chia0tzu,https://www.reddit.com/r/Netherlands/comments/...
5,Revolut in the Netherlands,They have a Dutch IBAN now? That's pretty neat...,4,GodBjorn,https://www.reddit.com/r/Netherlands/comments/...
6,Revolut in the Netherlands,I’ve been living in the Netherlands for 3 year...,3,Chernobie,https://www.reddit.com/r/Netherlands/comments/...
7,Revolut in the Netherlands,Using Revolut for >6 years now (for traveling ...,2,philthuene,https://www.reddit.com/r/Netherlands/comments/...
8,Revolut in the Netherlands,Does Revolut work at places that only accept d...,2,bentrider,https://www.reddit.com/r/Netherlands/comments/...
9,Revolut in the Netherlands,"You do you, but I call you crazy for even cons...",1,KingOfCotadiellu,https://www.reddit.com/r/Netherlands/comments/...


In [14]:
#save data
output_path = os.path.join("..", "Public Data", "Revolut_Reddit_reviews.xlsx")
df.to_excel(output_path, index=False)


OSError: Cannot save file into a non-existent directory: 'C:\Users\nklom\OneDrive\Pictures\Documents\Master\Thesis\Data'

In [13]:
# Fetch and save comments related to Revolut
keyword = "Revolut"
subreddit = "nederlands"
limit_posts = 1000 

In [14]:
# Store results
all_comments = []

# Search threads matching the keyword
for submission in reddit.subreddit(subreddit).search(keyword, limit=limit_posts):
    submission.comments.replace_more(limit=None)
    for comment in submission.comments.list():
        all_comments.append({
            "post_title": submission.title,
            "comment": comment.body,
            "score": comment.score,
            "author": str(comment.author),
            "permalink": f"https://www.reddit.com{comment.permalink}"
        })

# Convert to DataFrame
df_revn = pd.DataFrame(all_comments)

# Show the results
print(f"Found {len(df_revn)} comments across {limit_posts} posts.")
display(df_revn.head(10))



💬 Found 1338 comments across 1500 posts.


Unnamed: 0,post_title,comment,score,author,permalink
0,Waar op letten met openen rekening en heeft ie...,Bunq is vaak in het nieuws met slechte ervarin...,23,Dense_Jury5588,https://www.reddit.com/r/nederlands/comments/1...
1,Waar op letten met openen rekening en heeft ie...,Bunq heeft/had zijn beveiliging niet helemaal ...,17,Jaeger__85,https://www.reddit.com/r/nederlands/comments/1...
2,Waar op letten met openen rekening en heeft ie...,Specifieke reden dat je niet bij een Nederland...,6,FantasticPenguin,https://www.reddit.com/r/nederlands/comments/1...
3,Waar op letten met openen rekening en heeft ie...,"Bunq heb ik geen ervaringen mee, Revolut wel. ...",3,MikeThePenguin__,https://www.reddit.com/r/nederlands/comments/1...
4,Waar op letten met openen rekening en heeft ie...,"Ik heb al jaren een Revolut rekening, zelfs to...",3,ShelLuser42,https://www.reddit.com/r/nederlands/comments/1...
5,Waar op letten met openen rekening en heeft ie...,De trage klantenservice van bunq zou ik vermij...,2,DhoTjai,https://www.reddit.com/r/nederlands/comments/1...
6,Waar op letten met openen rekening en heeft ie...,"Bunq heeft veel, heel veel problemen. Do not g...",3,m71nu,https://www.reddit.com/r/nederlands/comments/1...
7,Waar op letten met openen rekening en heeft ie...,Waar je op moet letten hangt echt af van waar ...,1,Neither-Grade6397,https://www.reddit.com/r/nederlands/comments/1...
8,Waar op letten met openen rekening en heeft ie...,Ik heb al 8 jaar mijn rekeningen bij KNAB en b...,1,TylerPerry19inch,https://www.reddit.com/r/nederlands/comments/1...
9,Waar op letten met openen rekening en heeft ie...,is service bij fintech net zo betrouwbaar? nee.,1,trick2011,https://www.reddit.com/r/nederlands/comments/1...


In [15]:
#save data
output_path = os.path.join("..", "Public Data", "Revn_Reddit_reviews.xlsx")
df_revn.to_excel(output_path, index=False)

BUNQ

In [18]:
# Fetch and save comments related to Bunq
keyword = "bunq"
subreddit = "Netherlands"
limit_posts = 1500  


In [19]:
# Store results
all_comments = []

# Search for posts matching the keyword
for submission in reddit.subreddit(subreddit).search(keyword, limit=limit_posts):
    submission.comments.replace_more(limit=None)  # Load all comments
    for comment in submission.comments.list():
        # Check if keyword is in the comment (case-insensitive)
        if keyword.lower() in comment.body.lower():
            all_comments.append({
                "post_title": submission.title,
                "comment": comment.body,
                "score": comment.score,
                "author": str(comment.author),
                "permalink": f"https://www.reddit.com{comment.permalink}"
            })

# Convert to DataFrame
df_bunq = pd.DataFrame(all_comments)

# Show summary and preview
print(f"Found {len(df_bunq)} comments mentioning '{keyword}' across {limit_posts} posts.")
display(df_bunq.head(10))

💬 Found 524 comments mentioning 'bunq' across 1500 posts.


Unnamed: 0,post_title,comment,score,author,permalink
0,Good bunq alternative?,Revolut ticks all those boxes but it doesn’t p...,15,Unlucky_Quote6394,https://www.reddit.com/r/Netherlands/comments/...
1,Good bunq alternative?,Revolut is a pretty good free alternative. Has...,15,NastroAzzurro,https://www.reddit.com/r/Netherlands/comments/...
2,Good bunq alternative?,I switched banks as well baceuase of the horro...,2,PlantAndMetal,https://www.reddit.com/r/Netherlands/comments/...
3,Good bunq alternative?,I liked the idea of those new banks but found ...,2,hsifuevwivd,https://www.reddit.com/r/Netherlands/comments/...
4,Good bunq alternative?,Have you considered 2 accounts? A Revolut whic...,1,Able-Net5184,https://www.reddit.com/r/Netherlands/comments/...
5,Good bunq alternative?,Could you share what is wrong with bunq?,0,Simple-Preference-54,https://www.reddit.com/r/Netherlands/comments/...
6,Good bunq alternative?,Employees have browsed through financial detai...,15,BEERsandBURGERs,https://www.reddit.com/r/Netherlands/comments/...
7,Good bunq alternative?,From what I've read in the news (I am not a Bu...,9,netherlandsftw,https://www.reddit.com/r/Netherlands/comments/...
8,Good bunq alternative?,"See r/bunq but keep it mind, happy customers m...",2,hi-bb_tokens-bb,https://www.reddit.com/r/Netherlands/comments/...
9,Good bunq alternative?,Where the pockets don’t work for me is in that...,3,Unlucky_Quote6394,https://www.reddit.com/r/Netherlands/comments/...


In [23]:
#save data
output_path = os.path.join("..", "Public Data", "Bunq_Reddit_reviews.xlsx")
df_bunq.to_excel(output_path, index=False)

In [19]:
# Fetch and save comments related to Bunq
keyword = "bunq"
subreddit = "nederlands" 
limit_posts = 1500

In [20]:
# Store results
all_comments = []

# Search for posts matching the keyword
for submission in reddit.subreddit(subreddit).search(keyword, limit=limit_posts):
    submission.comments.replace_more(limit=None)  # Load all comments
    for comment in submission.comments.list():
        # Check if keyword is in the comment (case-insensitive)
        if keyword.lower() in comment.body.lower():
            all_comments.append({
                "post_title": submission.title,
                "comment": comment.body,
                "score": comment.score,
                "author": str(comment.author),
                "permalink": f"https://www.reddit.com{comment.permalink}"
            })

# Convert to DataFrame
df_bunq_n = pd.DataFrame(all_comments)

# Show summary and preview
print(f"Found {len(df_bunq_n)} comments mentioning '{keyword}' across {limit_posts} posts.")
display(df_bunq_n.head(10))

💬 Found 78 comments mentioning 'bunq' across 1500 posts.


Unnamed: 0,post_title,comment,score,author,permalink
0,“Bunq bank”,Bunq xdd,-1,Delicious-Shirt7188,https://www.reddit.com/r/nederlands/comments/1...
1,“Bunq bank”,Als je bereid bent bij bunq lange tijd te wach...,72,Dry_Capital_9119,https://www.reddit.com/r/nederlands/comments/1...
2,“Bunq bank”,Lol Bunq heeft niet eens een telefonische help...,7,Gloomy_Landscape8002,https://www.reddit.com/r/nederlands/comments/1...
3,“Bunq bank”,Elke bank heeft hier speciale email adressen e...,4,wildlyoffensiveusern,https://www.reddit.com/r/nederlands/comments/1...
4,“Bunq bank”,"Eh, de rentes bij de ECB zijn stukken hoger, e...",7,robertjan88,https://www.reddit.com/r/nederlands/comments/1...
5,“Bunq bank”,Bunq maakte pas afgelopen maand winst in 10 ja...,8,closetBoi04,https://www.reddit.com/r/nederlands/comments/1...
6,Waar op letten met openen rekening en heeft ie...,Bunq is vaak in het nieuws met slechte ervarin...,24,Dense_Jury5588,https://www.reddit.com/r/nederlands/comments/1...
7,Waar op letten met openen rekening en heeft ie...,Bunq heeft/had zijn beveiliging niet helemaal ...,16,Jaeger__85,https://www.reddit.com/r/nederlands/comments/1...
8,Waar op letten met openen rekening en heeft ie...,"Bunq heb ik geen ervaringen mee, Revolut wel. ...",3,MikeThePenguin__,https://www.reddit.com/r/nederlands/comments/1...
9,Waar op letten met openen rekening en heeft ie...,De trage klantenservice van bunq zou ik vermij...,2,DhoTjai,https://www.reddit.com/r/nederlands/comments/1...


In [21]:
#save data
output_path = os.path.join("..", "Public Data", "Bunq_n_Reddit_reviews.xlsx")
df_bunq_n.to_excel(output_path, index=False)

N26

In [20]:
# Fetch and save comments related to N26
keyword = "N26"
subreddit = "Netherlands"
limit_posts = 1000 

In [21]:
# Store results
all_comments = []

# Search for posts matching the keyword
for submission in reddit.subreddit(subreddit).search(keyword, limit=limit_posts):
    submission.comments.replace_more(limit=None)  # Load all comments
    for comment in submission.comments.list():
        # Check if keyword is in the comment (case-insensitive)
        if keyword.lower() in comment.body.lower():
            all_comments.append({
                "post_title": submission.title,
                "comment": comment.body,
                "score": comment.score,
                "author": str(comment.author),
                "permalink": f"https://www.reddit.com{comment.permalink}"
            })

# Convert to DataFrame
df_N26 = pd.DataFrame(all_comments)

# Show summary and preview
print(f"Found {len(df_N26)} comments mentioning '{keyword}' across {limit_posts} posts.")
display(df_N26.head(10))

💬 Found 126 comments mentioning 'N26' across 1000 posts.


Unnamed: 0,post_title,comment,score,author,permalink
0,When catastrophe strikes: iDeal & PayPal not s...,There are some online credit cards that are pe...,2,Pacpav,https://www.reddit.com/r/Netherlands/comments/...
1,When catastrophe strikes: iDeal & PayPal not s...,"I use N26, they offer a free debit card from M...",3,faabmaster,https://www.reddit.com/r/Netherlands/comments/...
2,When catastrophe strikes: iDeal & PayPal not s...,N26 :),1,GewoonSimon,https://www.reddit.com/r/Netherlands/comments/...
3,When catastrophe strikes: iDeal & PayPal not s...,N26 is the way to go,1,,https://www.reddit.com/r/Netherlands/comments/...
4,When catastrophe strikes: iDeal & PayPal not s...,n26 prepaid credit card,1,theredVL,https://www.reddit.com/r/Netherlands/comments/...
5,When catastrophe strikes: iDeal & PayPal not s...,"Either get a credit card from your bank, or si...",92,DaHaunter,https://www.reddit.com/r/Netherlands/comments/...
6,When catastrophe strikes: iDeal & PayPal not s...,"i use n26 myself, never really had an issue so...",2,Useful-Position-4445,https://www.reddit.com/r/Netherlands/comments/...
7,When catastrophe strikes: iDeal & PayPal not s...,I use N26. Only issue I have is that transferr...,2,MicrochippedByGates,https://www.reddit.com/r/Netherlands/comments/...
8,When catastrophe strikes: iDeal & PayPal not s...,N26 is better imo,1,MyspaceTime,https://www.reddit.com/r/Netherlands/comments/...
9,When catastrophe strikes: iDeal & PayPal not s...,Some banks and card issuers now offer “credit ...,2,JH_X,https://www.reddit.com/r/Netherlands/comments/...


In [24]:
#save data
output_path = os.path.join("..", "Public Data", "N26_Reddit_reviews.xlsx")
df_N26.to_excel(output_path, index=False)

In [22]:
# Fetch and save comments related to N26
keyword = "N26"
subreddit = "nederlands"
limit_posts = 1000  

In [23]:
# Store results
all_comments = []

# Search for posts matching the keyword
for submission in reddit.subreddit(subreddit).search(keyword, limit=limit_posts):
    submission.comments.replace_more(limit=None)  # Load all comments
    for comment in submission.comments.list():
        # Check if keyword is in the comment (case-insensitive)
        if keyword.lower() in comment.body.lower():
            all_comments.append({
                "post_title": submission.title,
                "comment": comment.body,
                "score": comment.score,
                "author": str(comment.author),
                "permalink": f"https://www.reddit.com{comment.permalink}"
            })

# Convert to DataFrame
df_N26_n = pd.DataFrame(all_comments)

# Show summary and preview
print(f"Found {len(df_N26_n)} comments mentioning '{keyword}' across {limit_posts} posts.")
display(df_N26_n.head(10))

💬 Found 31 comments mentioning 'N26' across 1000 posts.


Unnamed: 0,post_title,comment,score,author,permalink
0,De creditcard: wanneer je alleen maar hiermee ...,"N26, digitaal en gratis. Je pleurt het aankoop...",5,b4ttleduck,https://www.reddit.com/r/nederlands/comments/1...
1,De creditcard: wanneer je alleen maar hiermee ...,Neem een debit card via bijvoorbeeld N26.,6,Diederik-NL,https://www.reddit.com/r/nederlands/comments/1...
2,De creditcard: wanneer je alleen maar hiermee ...,Heel vaak als je in zo'n betaalscherm kunt kie...,1,TheHazardOfLife,https://www.reddit.com/r/nederlands/comments/1...
3,De creditcard: wanneer je alleen maar hiermee ...,Revolut en N26 bieden gratis debit cards aan d...,1,Interesting-Loan507,https://www.reddit.com/r/nederlands/comments/1...
4,De creditcard: wanneer je alleen maar hiermee ...,Gratis accountje bij N26 of Revolut?,1,Square_Dimension6763,https://www.reddit.com/r/nederlands/comments/1...
5,De creditcard: wanneer je alleen maar hiermee ...,"N26 gratis prepaid creditcard genomen, kost niks",1,goanywhere-hdk,https://www.reddit.com/r/nederlands/comments/1...
6,De creditcard: wanneer je alleen maar hiermee ...,N26 gratis bankrekening in Duitsland met grati...,1,Langkampo,https://www.reddit.com/r/nederlands/comments/1...
7,De creditcard: wanneer je alleen maar hiermee ...,"Ikzelf heb een account bij N26, dat is een Dui...",1,Vallaquenta,https://www.reddit.com/r/nederlands/comments/1...
8,De creditcard: wanneer je alleen maar hiermee ...,Je kan gratis een N26 debit card aanvragen dat...,1,ActHead,https://www.reddit.com/r/nederlands/comments/1...
9,De creditcard: wanneer je alleen maar hiermee ...,N26,1,UregMazino,https://www.reddit.com/r/nederlands/comments/1...


In [24]:
#save data
output_path = os.path.join("..", "Public Data", "N26_n_Reddit_reviews.xlsx")
df_N26_n.to_excel(output_path, index=False)

Knab

In [4]:
# Fetch and save comments related to Knab
keyword = "knab"
subreddit = "Netherlands"
limit_posts = 1000  

In [5]:
# Store results
all_comments = []

# Search for posts matching the keyword
for submission in reddit.subreddit(subreddit).search(keyword, limit=limit_posts):
    submission.comments.replace_more(limit=None)  # Load all comments
    for comment in submission.comments.list():
        # Check if keyword is in the comment (case-insensitive)
        if keyword.lower() in comment.body.lower():
            all_comments.append({
                "post_title": submission.title,
                "comment": comment.body,
                "score": comment.score,
                "author": str(comment.author),
                "permalink": f"https://www.reddit.com{comment.permalink}"
            })

# Convert to DataFrame
df_knab = pd.DataFrame(all_comments)

# Show summary and preview
print(f"Found {len(df_knab)} comments mentioning '{keyword}' across {limit_posts} posts.")
display(df_knab.head(10))

💬 Found 17 comments mentioning 'knab' across 1000 posts.


Unnamed: 0,post_title,comment,score,author,permalink
0,Have you received the new debit cards?,Got one from Knab and revolut,7,yorde,https://www.reddit.com/r/Netherlands/comments/...
1,Have you received the new debit cards?,Got one from Knab. It works almost everywhere,1,Yvorontsov,https://www.reddit.com/r/Netherlands/comments/...
2,Have you received the new debit cards?,I have ABN an Knab but the fee for K is around...,1,Affectionate_Set_962,https://www.reddit.com/r/Netherlands/comments/...
3,Do I need a Dutch bank account?,KNAB,1,Yvorontsov,https://www.reddit.com/r/Netherlands/comments/...
4,Good bunq alternative?,I think Knab has all those features as well.,3,BrainNSFW,https://www.reddit.com/r/Netherlands/comments/...
5,best (online) banks for the netherlands?,ABN or Knab,1,steigerbouwer,https://www.reddit.com/r/Netherlands/comments/...
6,Bank account?,"To be honest, reviews for banks are a bad meas...",2,DonPinda,https://www.reddit.com/r/Netherlands/comments/...
7,Bank account?,KNAB all the way,1,nixonneveld,https://www.reddit.com/r/Netherlands/comments/...
8,Business bank account as non-permanent resident,"I'm just on a student visa, so I think this ma...",1,a_seal2000,https://www.reddit.com/r/Netherlands/comments/...
9,Bunq versus Revolut,Knab is a much beter option…,1,DrSteffer,https://www.reddit.com/r/Netherlands/comments/...


In [25]:
#save data
output_path = os.path.join("..", "Public Data", "knab_Reddit_reviews.xlsx")
df_knab.to_excel(output_path, index=False)

In [26]:
# Fetch and save comments related to Knab
keyword = "knab"
subreddit = "nederlands"
limit_posts = 1000 

In [27]:
# Store results
all_comments = []

# Search for posts matching the keyword
for submission in reddit.subreddit(subreddit).search(keyword, limit=limit_posts):
    submission.comments.replace_more(limit=None)  # Load all comments
    for comment in submission.comments.list():
        # Check if keyword is in the comment (case-insensitive)
        if keyword.lower() in comment.body.lower():
            all_comments.append({
                "post_title": submission.title,
                "comment": comment.body,
                "score": comment.score,
                "author": str(comment.author),
                "permalink": f"https://www.reddit.com{comment.permalink}"
            })

# Convert to DataFrame
df_knab_n = pd.DataFrame(all_comments)

# Show summary and preview
print(f"Found {len(df_knab_n)} comments mentioning '{keyword}' across {limit_posts} posts.")
display(df_knab_n.head(10))

💬 Found 8 comments mentioning 'knab' across 1000 posts.


Unnamed: 0,post_title,comment,score,author,permalink
0,"Problemen met KNAB pinpas, wie ook?",Kun je bij KNAB niet gewoon met je telefoon be...,1,Bonepickle,https://www.reddit.com/r/nederlands/comments/1...
1,Nieuwe pinpas knab,Volgens dit artikel is dat niet zo: https://ww...,1,,https://www.reddit.com/r/nederlands/comments/1...
2,Kan je twee studentenrekeningen openen?,Bij Knab betaal je een vast bedrag voor een on...,1,ZoneProfessional8202,https://www.reddit.com/r/nederlands/comments/1...
3,Beste bank voor gezamenlijke rekening?,Ik heb Knab. Erg voordelig. Je betaalt maar vo...,1,ZoneProfessional8202,https://www.reddit.com/r/nederlands/comments/1...
4,Beste bank voor gezamenlijke rekening?,"Ik zou het bij 1 van de 4 grote banken houden,...",1,chartnoob,https://www.reddit.com/r/nederlands/comments/1...
5,Beste bank voor gezamenlijke rekening?,"Wij zijn ook voor Knab gegaan, prima service e...",1,Velum_In_1716,https://www.reddit.com/r/nederlands/comments/1...
6,Overstappen van bank,Ik heb zelf best goede ervaringen gehad met kn...,2,MikeWazowski2-2-2,https://www.reddit.com/r/nederlands/comments/1...
7,Bankrekening VVE,"Knab is ook wel prima, en goedkoop!",1,hetqtje,https://www.reddit.com/r/nederlands/comments/1...


In [28]:
#save data
output_path = os.path.join("..", "Public Data", "knab_n_Reddit_reviews.xlsx")
df_knab_n.to_excel(output_path, index=False)

# Merge Data 

In [25]:
# 7. Merge and Clean Data
# -----------------------
# Load previously saved Excel files

# Base file path
base_path = r"...\Thesis\Data"

# Step 1: Load individual datasets
n26_reddit = pd.read_excel(f"{base_path}\\N26_Reddit_reviews.xlsx")
bunq_reddit = pd.read_excel(f"{base_path}\\Bunq_Reddit_reviews.xlsx")
revolut_reddit = pd.read_excel(f"{base_path}\\Revolut_Reddit_reviews.xlsx")

# Step 2: Label each dataset with corresponding app
n26_reddit['app_name'] = 'N26'
bunq_reddit['app_name'] = 'Bunq'
revolut_reddit['app_name'] = 'Revolut'

# Step 3: Ensure consistent columns across all files
common_columns = ['post_title', 'comment', 'score', 'author', 'permalink', 'app_name']

# drop duplicates
for df in [n26_reddit, bunq_reddit, revolut_reddit]:
    df.drop_duplicates(subset=['comment', 'author', 'permalink'], inplace=True)

# Filter to common columns
n26_reddit = n26_reddit[common_columns]
bunq_reddit = bunq_reddit[common_columns]
revolut_reddit = revolut_reddit[common_columns]

# Step 4: Merge all Reddit data
merged_reddit_reviews = pd.concat([n26_reddit, bunq_reddit, revolut_reddit], ignore_index=True)


In [27]:
display(merged_reddit_reviews)

Unnamed: 0,post_title,comment,score,author,permalink,app_name
0,When catastrophe strikes: iDeal & PayPal not s...,There are some online credit cards that are pe...,2,Pacpav,https://www.reddit.com/r/Netherlands/comments/...,N26
1,When catastrophe strikes: iDeal & PayPal not s...,"I use N26, they offer a free debit card from M...",3,faabmaster,https://www.reddit.com/r/Netherlands/comments/...,N26
2,When catastrophe strikes: iDeal & PayPal not s...,N26 :),1,GewoonSimon,https://www.reddit.com/r/Netherlands/comments/...,N26
3,When catastrophe strikes: iDeal & PayPal not s...,N26 is the way to go,1,,https://www.reddit.com/r/Netherlands/comments/...,N26
4,When catastrophe strikes: iDeal & PayPal not s...,n26 prepaid credit card,1,theredVL,https://www.reddit.com/r/Netherlands/comments/...,N26
...,...,...,...,...,...,...
7278,Transferring funds to America,I use US Forex to transfer funds from the US t...,2,aliblue225,https://www.reddit.com/r/Netherlands/comments/...,Revolut
7279,Transferring funds to America,I’ve used transferwise for the reverse directi...,2,purple_pandaface,https://www.reddit.com/r/Netherlands/comments/...,Revolut
7280,Transferring funds to America,I looked into this briefly. Try PayPal.,1,TheStoffer,https://www.reddit.com/r/Netherlands/comments/...,Revolut
7281,Transferring funds to America,[deleted],3,,https://www.reddit.com/r/Netherlands/comments/...,Revolut


In [28]:
# Step 5: Save to Excel
merged_reddit_reviews.to_excel(f"{base_path}\\Merged_Reddit_Reviews.xlsx", index=False)


In [32]:
# Base file path
base_path = os.path.join("..", "data")

# Step 1: Load the Reddit Excel files
n26_reddit = pd.read_excel(f"{base_path}\\N26_n_Reddit_reviews.xlsx")
bunq_reddit = pd.read_excel(f"{base_path}\\Bunq_n_Reddit_reviews.xlsx")
revolut_reddit = pd.read_excel(f"{base_path}\\Revn_Reddit_reviews.xlsx")
knab_reddit = pd.read_excel(f"{base_path}\\knab_Reddit_reviews.xlsx")
knab_n_reddit = pd.read_excel(f"{base_path}\\knab_n_Reddit_reviews.xlsx")

# Step 2: Add app_name column
n26_reddit['app_name'] = 'N26'
bunq_reddit['app_name'] = 'Bunq'
revolut_reddit['app_name'] = 'Revolut'
knab_reddit['app_name'] = 'Knab'
knab_n_reddit['app_name'] = 'Knab'

# Step 3: Ensure consistent columns across all files
common_columns = ['post_title', 'comment', 'score', 'author', 'permalink', 'app_name']

# drop duplicates
for df in [n26_reddit, bunq_reddit, revolut_reddit, knab_reddit, knab_n_reddit]:
    df.drop_duplicates(subset=['comment', 'author', 'permalink'], inplace=True)

# Filter to common columns
n26_reddit = n26_reddit[common_columns]
bunq_reddit = bunq_reddit[common_columns]
revolut_reddit = revolut_reddit[common_columns]
knab_reddit = knab_reddit[common_columns]
knab_n_reddit = knab_n_reddit[common_columns]

# Step 4: Merge all Reddit data 
merged_extra_reddit_reviews = pd.concat(
    [n26_reddit, bunq_reddit, revolut_reddit, knab_reddit, knab_n_reddit],
    ignore_index=True
)

# Preview merged data
print(merged_extra_reddit_reviews.head())
print(f"\nTotal reviews combined: {len(merged_extra_reddit_reviews)}")



                                          post_title  \
0  De creditcard: wanneer je alleen maar hiermee ...   
1  De creditcard: wanneer je alleen maar hiermee ...   
2  De creditcard: wanneer je alleen maar hiermee ...   
3  De creditcard: wanneer je alleen maar hiermee ...   
4  De creditcard: wanneer je alleen maar hiermee ...   

                                             comment  score  \
0  N26, digitaal en gratis. Je pleurt het aankoop...      5   
1          Neem een debit card via bijvoorbeeld N26.      6   
2  Heel vaak als je in zo'n betaalscherm kunt kie...      1   
3  Revolut en N26 bieden gratis debit cards aan d...      1   
4               Gratis accountje bij N26 of Revolut?      1   

                 author                                          permalink  \
0            b4ttleduck  https://www.reddit.com/r/nederlands/comments/1...   
1           Diederik-NL  https://www.reddit.com/r/nederlands/comments/1...   
2       TheHazardOfLife  https://www.reddit.com/r/

In [35]:
merged_extra_reddit_reviews.to_excel(f"{base_path}\\Merged_extra_Reddit_Reviews.xlsx", index=False)