In [820]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import ast
import string
import re
from deep_translator import GoogleTranslator
from sentence_transformers import SentenceTransformer
from openai import OpenAI
import tiktoken
from langchain.prompts import PromptTemplate
from sklearn.feature_extraction.text import CountVectorizer
from bertopic import BERTopic
from bertopic.representation import OpenAI
import openai
import tiktoken


# Tokenizer

sys.path.append(os.path.abspath(".."))
from src.funcs import remove_stopwords
from src.funcs import remove_punctuation
from src.funcs import unicode
from src.funcs import clean
from src.funcs import clean
from src.funcs import translate_by_division

### gpt labling

In [3]:
import os
os.environ['OPENAI_API_KEY'] = os.environ.get('OPENAI_API_KEY')

In [746]:
df = pd.read_csv('../data/processed/review_data_berlin_en.csv')

In [747]:
df.columns

Index(['Unnamed: 0', 'namenr', 'place_ids', 'lat', 'lon', 'geometry',
       'review_text', 'cleaned_text', 'review_text_english'],
      dtype='object')

In [748]:
cols = ['namenr', 'place_ids', 'lat', 'lon', 'geometry',
       'review_text']

In [749]:
df = df[cols]

In [750]:
df

Unnamed: 0,namenr,place_ids,lat,lon,geometry,review_text
0,Schlosspark%Charlottenburg,ChIJLQhWKi5RqEcRuLWq4wG9HDE,52.523866,13.292494,POINT (13.2924939 52.523866399999996),"['One of Berlin’s treasures, what i like about..."
1,Sophie-Charlotten-Platz%-%GA,ChIJsXdQFdhQqEcRnq4gT95JHW0,52.511170,13.296790,POINT (13.29679 52.51117),"[""Sophie-Charlotte-Platz metro station in Berl..."
2,Savignyplatz,ChIJCeEX0vxQqEcRVuhtE5dxUq0,52.506136,13.322457,POINT (13.322457199999999 52.506136399999995),['Beautiful little park. Wish people would not...
3,Spreebogen,ChIJoTVigqBRqEcRQHrs906Stfg,52.523778,13.347262,POINT (13.3472615 52.523778199999995),"[""This place is an absolute gem, a true meetin..."
4,Goslarer%Platz,ChIJaftL6j1RqEcRwKttfGRrD7k,52.525109,13.314311,POINT (13.3143107 52.525109099999995),['Not very clean. But ok. 4 benches. And somet...
...,...,...,...,...,...,...
278,Alt-Marienfelde%Dorfaue%-%Gartendenkmal,ChIJ7X1YEK5RqEcRGzWZvCwfSQk,52.485383,13.325869,POINT (13.3258692 52.485382900000005),"['Schlichter Stein, schön zum begutachten', 'S..."
279,Birkholz,ChIJG4J7vN1QqEcRvScjF5M646o,52.503858,13.303162,POINT (13.3031616 52.503858099999995),['Unsere geliebte Mutti ist nachdem sie im Hau...
280,Union,ChIJATQHP-JQqEcRYLqj1s5WG1k,52.508056,13.317497,POINT (13.317496499999999 52.5080558),['This bar was the highlight of our trip. The...
281,Ehem.%Flugfeld/%Naturerlebnisraum%Jo%GA,ChIJmzoVxehPqEcRB6YzEZWqGuM,52.482115,13.389191,POINT (13.3891911 52.482115099999994),['Excellent tour - I forget the guides name bu...


In [752]:
chunk1 = df[:100]
chunk2 = df[100:200]
chunk3 = df[200:]
chunk1['review_en'] = chunk1['review_text'].apply(lambda x: translate_by_division(x))
chunk2['review_en'] = chunk2['review_text'].apply(lambda x: translate_by_division(x))
chunk3['review_en'] = chunk3['review_text'].apply(lambda x: translate_by_division(x))

KeyboardInterrupt: 

In [None]:
df = pd.concat([chunk1, chunk2, chunk3],axis=0)

In [89]:
#df = pd.concat([chunk1, chunk2, chunk3], axis=0, ignore_index=False)

### Filter

In [730]:
key = os.environ.get('OPENAI_API_KEY')

In [101]:
prompt_template = PromptTemplate.from_template(
    '''
    Your task is to extract sentences from {sentence}.
    You are an assistant for labeling negative annotaded sentences in user reviews.
    Return the sentences that are associated to be negative in form of a list.
    Provide output without further text information.
    Use the following schema ['sentence 1', 'sentence 2', ...]
    '''
)

In [102]:
client = OpenAI()

In [104]:
def labeling(sentence: str) -> str:
    try:
        # Format the prompt dynamically with the input sentence
        prompt = prompt_template.format(sentence=sentence)
        
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "user",
                    "content": prompt,
                }
            ],
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Error: {e}"

In [106]:
df['labeled_sentences'] = df['review_en'].apply(lambda x: labeling(x))

### topic modelling

In [760]:
df = pd.read_csv('../data/processed/bert_review_en_labeled.csv')

In [768]:
df = df[df.review_en.apply(lambda x: isinstance(x, str))]

In [769]:
all_sentences_with_index = []

for idx, row in df.review_en.items():
    # Clean the row
    cleaned_row = row.replace("[", "").replace("]", "").replace("\\n", "").strip()
    # Split sentences
    sentences = cleaned_row.split(", '")
    # Append each sentence with its original index
    for sentence in sentences:
        all_sentences_with_index.append((idx, sentence))


In [770]:
doc_look_up_df = pd.DataFrame(all_sentences_with_index)
cols=['orig_index', 'sentence']
doc_look_up_df.columns = cols

In [771]:
docs = list(doc_look_up_df.sentence)

In [794]:
doc_look_up_df

Unnamed: 0,orig_index,sentence
0,0,"'One of Berlin's treasures, what I like about ..."
1,0,This Christmas market is a magical experience ...
2,0,We visited Schlossgarten Charlottenburg on a w...
3,0,"Beautiful nature, absolutely!I was just expect..."
4,0,Nice place to spend time with your family or f...
...,...,...
13800,282,"They only accept cash, even for bigger amounts..."
13801,282,Cozy spot for small plate Vietnamese food at a...
13802,282,all the coffees and smoothies are overly sweet...
13803,282,Incredible!!! Best fried rice I have ever had....


#### topic model

In [1166]:
from bertopic.representation import KeyBERTInspired
from bertopic.representation import PartOfSpeech
from bertopic.representation import MaximalMarginalRelevance
#from sklearn.datasets import fetch_20newsgroups

In [1167]:
vectorizer_model = CountVectorizer(stop_words='english') #ngram_range=(1, 3)

In [1168]:
main_representation = KeyBERTInspired()

# Additional ways of representing a topic
aspect_model1 = PartOfSpeech("en_core_web_sm")
aspect_model2 = [KeyBERTInspired(top_n_words=30), MaximalMarginalRelevance(diversity=.5)]

# Add all models together to be run in a single `fit`
representation_model = {
   "Main": main_representation,
   "Aspect1":  aspect_model1,
   "Aspect2":  aspect_model2 
}

In [1169]:
topic_model = BERTopic(representation_model=representation_model,vectorizer_model=vectorizer_model, calculate_probabilities=True, nr_topics=50).fit(docs)

In [1170]:
topic_model.visualize_barchart(top_n_topics = 40, n_words = 20)

In [1184]:
topic_model.get_topic_info().head(10)

Unnamed: 0,Topic,Count,Name,Representation,Aspect1,Aspect2,Representative_Docs
0,-1,4533,-1_park_berlin_parks_visit,"[park, berlin, parks, visit, relaxing, nearby,...","[park, nice, place, beautiful, great, good, pe...","[park, berlin, visit, relaxing, nearby, river,...",[So beautiful and green I absolutely love just...
1,0,2943,0_park_nearby_playgrounds_playground,"[park, nearby, playgrounds, playground, promen...","[park, nice, place, playground, walk, small, b...","[park, nearby, playgrounds, promenade, garden,...","[Very nice park.', Very nice park', Very nice ..."
2,1,1187,1_berlin_berlins_brandenburg_park,"[berlin, berlins, brandenburg, park, monuments...","[zoo, park, animals, city, visit, place, build...","[berlins, brandenburg, park, attractions, zoo,...",[We were located 20 minutes walking distance f...
3,2,761,2_place_places_location_wonderful,"[place, places, location, wonderful, visit, gr...","[place, nice, atmosphere, great, city, beautif...","[place, visit, lovely, paradise, museum, cozy,...","[nice place' , Very nice place', Nice place']"
4,3,445,3_memorial_memorials_berlin_monument,"[memorial, memorials, berlin, monument, monume...","[memorial, soviet, war, cemetery, soldiers, mo...","[memorials, berlin, soviets, reichstag, commem...",[A very beautiful memorial to fallen soldiers....
5,4,409,4_station_stations_subway_railway,"[station, stations, subway, railway, trains, m...","[station, subway, train, parking, trains, u1, ...","[stations, subway, railway, metro, berlin, pas...","[It's just a subway station and nothing more',..."
6,5,404,5_good_excellent_great_positively,"[good, excellent, great, positively, wonderful...","[good, nice, cool, beautiful, great, impressiv...","[positively, liked, perfect, son, pretty, best...","[Good', Good', Good']"
7,6,317,6_building_buildings_architecture_architectural,"[building, buildings, architecture, architectu...","[church, building, architecture, tour, modern,...","[buildings, tower, cathedral, design, ruins, s...","[Beautiful building.', Very beautiful modern b..."
8,7,288,7_cleanliness_dirty_cleaning_clean,"[cleanliness, dirty, cleaning, clean, littered...","[clean, dirty, urine, rubbish, toilet, crowded...","[cleanliness, littered, dirt, toilets, rubbish...",[Nice but unfortunately a bit too dirty here.....
9,8,271,8_restaurant_meal_food_dishes,"[restaurant, meal, food, dishes, customers, di...","[food, service, staff, menu, friendly, unfrien...","[restaurant, dishes, customers, served, tasty,...","[Great food, delicious wine and excellent serv..."


In [1174]:
topics, probs = topic_model.fit_transform(docs)

In [1176]:
#filtered_prob = topic_model.reduce_outliers(docs, topics, probabilities=probs, strategy="probabilities")
new_topics = topic_model.reduce_outliers(docs, topics , strategy="c-tf-idf", threshold=0.1)

In [1187]:
topic_df = topic_model.get_document_info(docs)

In [1192]:
dirty_df = topic_df[topic_df['Topic'] == 7]

In [1186]:
topic_model.get_document_info(docs)['Topic'] == 7

TypeError: 'BERTopic' object is not subscriptable

In [1143]:
topic_model = BERTopic(calculate_probabilities=True, vectorizer_model=vectorizer_model)

In [1144]:
topics, probs = topic_model.fit_transform(docs)

In [1145]:
filtered_prob = topic_model.reduce_outliers(docs, topics, probabilities=probs, strategy="probabilities")
new_topics = filtered_prob.reduce_outliers(docs, topics , strategy="c-tf-idf", threshold=0.1)

AttributeError: 'list' object has no attribute 'reduce_outliers'

In [1193]:
#topic_model.visualize_barchart(top_n_topics = 20, n_words = 20)

#### topics eda

In [1147]:
topic_df = topic_model.get_topic_info()

In [1148]:
doc_df = topic_model.get_document_info(docs)

In [1157]:
from bertopic.representation import KeyBERTInspired
from bertopic.representation import PartOfSpeech
from bertopic.representation import MaximalMarginalRelevance
from sklearn.datasets import fetch_20newsgroups

main_representation = KeyBERTInspired()

# Additional ways of representing a topic
aspect_model1 = PartOfSpeech("en_core_web_sm")
aspect_model2 = [KeyBERTInspired(top_n_words=30), MaximalMarginalRelevance(diversity=.5)]

# Add all models together to be run in a single `fit`
representation_model = {
   "Main": main_representation,
   "Aspect1":  aspect_model1,
   "Aspect2":  aspect_model2 
}
topic_model = BERTopic(representation_model=representation_model).fit(docs)


In [1158]:
topic_model.visualize_barchart(top_n_topics = 40, n_words = 20)

In [1164]:
topic_model.get_topic_info().iloc[150:200]

Unnamed: 0,Topic,Count,Name,Representation,Aspect2,Representative_Docs
150,149,23,149_berlin_brandenburg_corner_prussian,"[berlin, brandenburg, corner, prussian, demoli...","[brandenburg, corner, demolished, graffiti, kr...","[Clean and quiet corner of Berlin', Clean and ..."
151,150,23,150_park_parkvery_parkour_place,"[park, parkvery, parkour, place, pleasant, are...","[park, area, residential, surroundings, staff,...","[Mostly a nice park. They are some weirdos, wa..."
152,151,23,151_waterfall_waterfalls_picturesque_waterfallit,"[waterfall, waterfalls, picturesque, waterfall...","[waterfalls, picturesque, monument, visit, flo...",[Nice view on the city! And the only one water...
153,152,23,152_love_loved__,"[love, loved, , , , , , , , ]","[love, loved, , , , , , , , , , , , , , , , , ...","[Love it', I love it', Love it']"
154,153,22,153_cathedral_church_dome_breathtaking,"[cathedral, church, dome, breathtaking, archit...","[cathedral, dome, breathtaking, stairs, devoti...",[As magnificent as it can be! This cathedral i...
155,154,22,154_steinplatz_kirchplatz_republik_oranienplatz,"[steinplatz, kirchplatz, republik, oranienplat...","[steinplatz, reichstag, berlins, square, landm...","[Nothing fancy, just a relaxing park', "" Neust..."
156,155,22,155_quiet_loud_pleasantly_clean,"[quiet, loud, pleasantly, clean, very, relaxin...","[quiet, loud, pleasantly, clean, very, relaxin...","[Quiet and nice', Nice and quiet.', Nice and q..."
157,156,22,156_garden_flowers_blossoms_visiting,"[garden, flowers, blossoms, visiting, floral, ...","[garden, flowers, visiting, retreat, tiergarte...",['A very nice retreat in the big park. It's an...
158,157,22,157_children_kids_kiddies_suitable,"[children, kids, kiddies, suitable, little, fo...","[children, small, helps, natural, mitte, areas...","[It is very good for children.', It is very go..."
159,158,22,158_water_piece_sea_houseboats,"[water, piece, sea, houseboats, part, cloudy, ...","[piece, sea, houseboats, cloudy, nice, fishing...","[Nice piece of water', Nice piece of water', N..."


In [716]:
topic_list = [1, 2, 8, 13, 15, 20, 24, 25, 38]

In [717]:
topic_df = topic_df[topic_df.Topic.isin(topic_list)]

In [718]:
dirty = [8,13,15, 20, 24, 25, 38]

In [719]:
dirty = doc_df[doc_df.Topic.isin(dirty)]

In [720]:
dirty['look_up'] = dirty.index

In [721]:
doc_look_up_df['look_up'] = doc_look_up_df.index

In [722]:
dirty_df= pd.merge(left=dirty, right=doc_look_up_df, on='look_up', how='inner')
dirty_df.head(4)

Unnamed: 0,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document,look_up,orig_index,sentence
0,Platform 5 smells like dick dikkah',20,20_urine_smell_smells_smell urine,"[urine, smell, smells, smell urine, parking, s...",[Currently homeless people and junkies spend t...,urine - smell - smells - smell urine - parking...,0.455247,False,6,1,Platform 5 smells like dick dikkah'
1,"Dirty, not enough benches, lots of foreigners ...",13,13_benches_dirty benches_dirty_benches tipsy,"[benches, dirty benches, dirty, benches tipsy,...","[Dirty, not enough benches, lots of foreigners...",benches - dirty benches - dirty - benches tips...,0.054712,True,33,4,"Dirty, not enough benches, lots of foreigners ..."
2,Unfortunately not very clean Platz!',8,8_dirty_clean_dirty dirty_unfortunately,"[dirty, clean, dirty dirty, unfortunately, unf...","[Dirty'\n```, Dirty', Dirty']",dirty - clean - dirty dirty - unfortunately - ...,0.924909,False,35,4,Unfortunately not very clean Platz!'
3,the bushes are pretty dirty from questionable ...,13,13_benches_dirty benches_dirty_benches tipsy,"[benches, dirty benches, dirty, benches tipsy,...","[Dirty, not enough benches, lots of foreigners...",benches - dirty benches - dirty - benches tips...,0.027441,False,69,10,the bushes are pretty dirty from questionable ...


In [723]:
finaldirty_df = pd.merge(left=df, right=dirty_df, on='orig_index', how='inner')

In [724]:
cols = ['namenr', 'geometry', 'sentence', 'Topic']

In [727]:
finaldirty_df.namenr.value_counts().head(10)

namenr
Park%am%Hauptbahnhof                                       11
Spreeuferpromenade%zw.%Jannowitzbrücke%u.%Michaelbrücke     9
Annemirl-Bauer-Platz                                        8
Waldeckpark                                                 7
Ottopark                                                    7
Anita-Berber-Park                                           6
Falkplatz                                                   5
Hallesches%Ufer%zw.%Schöneberger%u.%Möckernbrücke           4
Böcklerpark                                                 4
Nordhafenpark%Ost                                           4
Name: count, dtype: int64

In [659]:
#df['orig_index'] = df['Unnamed: 0']

In [661]:
dirty_spaces['namenr'].value_counts().head(10)

namenr
Park%am%Hauptbahnhof                                       11
Spreeuferpromenade%zw.%Jannowitzbrücke%u.%Michaelbrücke     9
Annemirl-Bauer-Platz                                        8
Waldeckpark                                                 7
Ottopark                                                    7
Anita-Berber-Park                                           6
Falkplatz                                                   5
Hallesches%Ufer%zw.%Schöneberger%u.%Möckernbrücke           4
Böcklerpark                                                 4
Nordhafenpark%Ost                                           4
Name: count, dtype: int64

In [619]:
top_namenr = namenr_counts.head(10)

# Convert to a DataFrame for better readability
top_namenr_df = top_namenr.reset_index()
top_namenr_df.columns = ['namenr', 'count']

In [621]:
namenr_counts

namenr
Volkspark%am%Weinbergsweg             2
Fritz-Schloß-Park                     2
Sellerpark                            2
Park%an%der%Südpanke                  2
Platz%der%Republik                    2
                                     ..
Klosterkirche%Grünanlage              1
Luisenstädtischer%Kirchpark           1
Spreeuferpromenade%Märkisches%Ufer    1
Köllnischer%Park                      1
Eichbuschplatz%Bo%GA                  1
Name: count, Length: 273, dtype: int64

### testing berttopic prompting

In [956]:
prompt = prompt = """
I am looking for topics that can be found in  the following documents: 
[DOCUMENTS]
All documents are reviews of parks and green areas in Berlin.
Among all topics I am looking for one or more specific topics that can be described by the following keywords: [dirty, rubbish, smells, rats, unkept, homeless].
Based on the above information, extract a short label for the topic in the following format:
topic: <label>
"""

In [1135]:
prompt = """
I have a topic that is described by the following keywords: [dirty, dangerous, homeless, uncomfortable]

In this topic, the following documents are a small but representative subset of all documents in the topic:
- 'Not very clean. But ok. 4 benches. And sometimes dogs'
- 'A lot of dealers, drugs everwhere and dark corners with no lights'
- 'Some areas are occupied by homeless people who sleep in tents in the park and leave their rubbish lying around.'
- 'Creepy, much too loud and far too much traffic'

I will provide you a larger number of documents that contain more topics than the one desribed above.
Based on the above information, extract a short label for all topics in the following format:
topic: <label>
"""


In [1136]:
tokenizer= tiktoken.encoding_for_model("gpt-3.5-turbo")

In [1137]:
representation_model = OpenAI(
    client,
    model="gpt-3.5-turbo",
    delay_in_seconds=2,
    chat=True,
    nr_docs=4,
    doc_length=100,
    tokenizer=tokenizer,
    prompt=prompt 
)

In [1138]:
#vectorizer_model = CountVectorizer(ngram_range=(1, 3), stop_words='english')

In [1139]:
topic_model = BERTopic(representation_model=representation_model, nr_topics=20)

In [1140]:
topics, probs = topic_model.fit_transform(docs)

In [834]:
"""new_topics = topic_model.reduce_outliers(docs, topics, probabilities=probs, strategy="probabilities")
new_topics = topic_model.reduce_outliers(docs, topics , strategy="c-tf-idf", threshold=0.1)"""

In [1141]:
topic_df = topic_model.get_topic_info()
topic_df

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,4602,-1_Unsanitary and Unsafe Environment,[Unsanitary and Unsafe Environment],[It is a vibrant place to chill out and if you...
1,0,4415,0_Unsanitary and Unsafe Environment,[Unsanitary and Unsafe Environment],[A beautiful park that is very popular as a me...
2,1,2014,1_uncomfortable and dirty surroundings,[uncomfortable and dirty surroundings],[This was place I have enjoyed most in Berlin ...
3,2,795,2_unsanitary city environment,[unsanitary city environment],[I've just seen an interview with the new dual...
4,3,738,3_urban park issues,[urban park issues],"[It's OK', Super nice', Super nice']"
5,4,378,4_urban environment issues,[urban environment issues],"[An old Berlin subway station, which was histo..."
6,5,344,"5_dirty, dangerous, homeless, uncomfortable","[dirty, dangerous, homeless, uncomfortable]",['Paseo muy agradable a orillas del río Spree....
7,6,126,6_Given the described subset of documents for ...,[Given the described subset of documents for t...,"[Great', None, None, None, None, None, None, N..."
8,7,109,"7_[dirty, dangerous, homeless, uncomfortable]","[[dirty, dangerous, homeless, uncomfortable]]","[Kp', 😀 you jk', 😀 you jk']"
9,8,75,8_urban environment issues,[urban environment issues],"[Chill place', Chill', Chill']"


In [1130]:
topic_df.

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
241,240,13,240_Mixed Feelings Park,[Mixed Feelings Park],"[Very good connection to the subway', Very goo..."
242,241,13,241_Park with Mixed Experience,[Park with Mixed Experience],"[Nice Park', ""One of Berlin's hidden gems"", On..."
243,242,13,242_Urban Park Characteristics,[Urban Park Characteristics],"[A wonderful place', Wonderful place', Wonder..."
244,243,13,243_Urban Park Characteristics,[Urban Park Characteristics],[Beautiful route for beginners with roller ska...
245,244,13,244_Urban Park Characteristics,[Urban Park Characteristics],"[Wound erable place', None, None, None, None, ..."
246,245,13,245_Public Park Characteristics,[Public Park Characteristics],[Nice open space. Lots of places for kids and ...
247,246,13,246_Park with mixed reviews,[Park with mixed reviews],[It was wonderful to walk along the waterfront...
248,247,12,247_Urban Park Description,[Urban Park Description],"[Bearable', Bearable', Bearable']"
249,248,12,248_Mixed Feelings Park,[Mixed Feelings Park],"['Very peaceful place .', Peaceful place!', Pe..."
250,249,12,249_mixed atmosphere park,[mixed atmosphere park],"[Worth a trip when the weather is nice.', None..."


In [1119]:
filtered_df_2.Document.iloc[120]

"Nice new playground and mostly nice people. Unfortunately no public toilet in the area.'"

In [1055]:
topic_df = topic_model.get_topic_info()

In [1078]:
filtered_df.Document.iloc[7]

"Nothing special here.'"

In [970]:
doc_df = topic_model.get_document_info(docs)

In [978]:
filtered_df_3=doc_df[doc_df['Topic']==6]

In [954]:
filtered_df_2 = doc_df[doc_df['Topic']==4]

In [982]:
len(filtered_df_2)

303

In [983]:
#check = [9, 40, 36, 34,28]

In [910]:
filtered_df = doc_df[doc_df['Topic'].isin(check)]

In [1016]:
len(filtered_df_2.index)

303

In [1026]:
filtered_df['orig_index'] = filtered_df.index

In [1027]:
doc_look_up_df['map'] = doc_look_up_df.index

In [1028]:
filtered_df['map'] = filtered_df.index

In [1029]:
merged_df=pd.merge(left=doc_look_up_df, right=filtered_df, on='map', how='inner')

In [1030]:
merged_df['orig_index'] = merged_df['orig_index_x']

In [1031]:
df['orig_index'] = df.index

In [1032]:
final_df = pd.merge(left=df, right=merged_df, on='orig_index', how='inner')

In [1033]:
final_df.namenr.value_counts().head(15)

namenr
Park%am%Hauptbahnhof                                       21
Ottopark                                                   11
Nordhafenpark%Ost                                           8
Hertzbergplatz-Grünanlage                                   7
Annemirl-Bauer-Platz                                        7
Spreeuferpromenade%zw.%Jannowitzbrücke%u.%Michaelbrücke     6
Besselpark                                                  6
Puschkinallee/%Schlesischer%Busch%AT%GA                     5
Fischerinsel%11%an%der%Schwimmhalle                         5
Hallesches%Ufer%zw.%Schöneberger%u.%Möckernbrücke           5
Sellerpark                                                  5
Blankensteinpark                                            4
Platz%der%Republik                                          4
Karl-Marx-Allee%70C-F                                       4
Görlitzer%Park                                              4
Name: count, dtype: int64

In [1034]:
'Waldeckpark', 'Annemirl-Bauer-Platz', 'Hertzbergplatz-Grünanlage', 'Ottopark',                                

('Waldeckpark',
 'Annemirl-Bauer-Platz',
 'Hertzbergplatz-Grünanlage',
 'Ottopark')