In [5]:
#Import Libraries
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
plt.style.use('seaborn')
import seaborn as sns

pd.options.mode.chained_assignment = None  
pd.set_option('display.max_columns',100)  # or 1000
pd.set_option('display.max_rows', 100)  # or 1000

from sklearn import preprocessing

import warnings
warnings.filterwarnings('ignore')

#NLP
from transformers import pipeline

model_name = "distilbert-base-cased-distilled-squad"

In [6]:
# a) Get predictions
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
QA_input = {
    'question': 'what is the name?',
    'context': 'Name of the user is Ege'
}
res = nlp(QA_input)

print(res)

Downloading:   0%|          | 0.00/473 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/261M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436k [00:00<?, ?B/s]

{'score': 0.982586145401001, 'start': 20, 'end': 23, 'answer': 'Ege'}


In [59]:
#Import dataset
df_raw = pd.read_csv('listings.csv')

#Checking Shape
print(f'{df_raw.shape[0]} examples, {df_raw.shape[1]} features')

# ... and duplicates
print("It contains {} duplicates.".format(df_raw.duplicated().sum()))


24551 examples, 106 features
It contains 0 duplicates.


In [60]:
df = df_raw.head(100)

In [87]:
def predict_size(df,column):
    
    square_meters = []
    question = 'What is the size of the room/ in square meters (m2 or sqm) ?'
    counter = 0
    QA_input = {
        'question': question,
        'context': 'context will come here.'
    }

    for context in df[column]:

        if pd.isnull(context):
            square_meters.append({'answer':'unknown'})

        #Update QA_input's context:
        else:
            QA_input['context'] = context

            res = nlp(QA_input)
            square_meters.append(res)
            
    return square_meters

    
    

In [92]:
df['text'] = df['summary'].astype(str) + ' ' + df['space'].astype(str) + ' ' + df['description'].astype(str)

df['text'][0]

arr3 = predict_size(df,'text')

In [93]:
df['text'][4]

'nan Stay in a stylish loft on the second floor and you have the apartment all to yourself. All ceilings are 4 meters high and the front has 4 huge windows. Enjoy a fully furnished apartment with wooden floors, kitchen, bathroom with shower and an (almost) always sunny balcony. Bright and quiet, although very central.  Details of loft: - 63 square meter - Loft is located in quiet courtyard in an totally renovate old factory building - 2 Rooms (Sleeping- & Living Room) plus Kitchen / Bathroom w/ shower / huge Balcony - Parquet floors - Huge balcony - King size box spring bed (1.60 x 2.00 m) - Miele Washer & Dryer, Dishwasher, Fridge, Nespresso & coffee maker - Super fast DSL/WiFi and Flat screen TV with digital cable; Blue Ray/ DVD-Player Stay in a stylish loft on the second floor and you have the apartment all to yourself. All ceilings are 4 meters high and the front has 4 huge windows. Enjoy a fully furnished apartment with wooden floors, kitchen, bathroom with shower and an (almost) 

In [None]:
arr3 = predict_size(df,'text')

In [94]:
for i in range(len(square_meters)):
    print(i,')',arr3[i]['answer'],' || Manual:', df['size'][i])

0 ) 28  || Manual: 28
1 ) 30 of 75  || Manual: 75
2 ) 68m2  || Manual: 68
3 ) 26  || Manual: 26
4 ) 63  || Manual: 63
5 ) 20 sqm  || Manual: 20
6 ) 165  || Manual: 780
7 ) Prenzlauer Berg  || Manual: nan
8 ) 55  || Manual: 55
9 ) 1,80 m x 2,20  || Manual: 80
10 ) Kottbusser Tor - full size loft bed  || Manual: 16
11 ) 100 qm  || Manual: nan
12 ) 1-Room-turn of the century  || Manual: nan
13 ) 90 x 200 cm  || Manual: nan
14 ) 40m2  || Manual: 70
15 ) 50  || Manual: 50
16 ) 150 sqm  || Manual: 150
17 ) 150 sqm  || Manual: 150
18 ) 84  || Manual: 84
19 ) south-facing upper ground floor  || Manual: nan
20 ) 25  || Manual: 25
21 ) 100m  || Manual: 100
22 ) 400  || Manual: 47
23 ) 12  || Manual: 100
24 ) small  || Manual: 10
25 ) 200square  || Manual: 200
26 ) 700 sq. ft  || Manual: 65
27 ) 120 sq.m  || Manual: 20
28 ) 3 months  || Manual: nan
29 ) 2  || Manual: nan
30 ) 200  || Manual: 310
31 ) 25qm  || Manual: nan
32 ) M4/M10  || Manual: nan
33 ) 1,60m  || Manual: 35
34 ) 77m2  || Manual: 

In [98]:
df['space'][98]

'The beautiful, sunny and newly renovated 76qm appartement with 2 rooms and elevator (3rd floor) is in the beautiful Naugarder Strasse Prenzlauer Berg, near from Alexanderplatz and Mitte. Easy access to Tram and S Bahn (5 minutes) . 12 minutes by tram to Alexanderplatz. The appartement has an elevator, is in the 3rd floor and very sunny and  wooden floor, Stuck at the ceiling  and the special thing is that it has wtwo big balconies, one to the west side the other one to the east side. That means that you have always sun at least at one of the two balconies (if the sun shines of course:-). The kitchen has enaugh room to sit with 4 or five or even more people and has an extra balcony. There is also a sleeping coach additionally in one room, where two people can sleep on, for visits. Also there is one table and chair if you have work to do at home. All the rooms are very light and the furniture is a mixture between antique and modern. Inclusive W-LAN, washing machine, laundry dryer, dish 

In [73]:
square_meters= predict_size(df,'description')
square_meters_2= predict_size(df,'space')



In [74]:
# extract numbers 
df['size'] = df['description'].str.extract('(\d{2,3}\s?[smSM])', expand=True)
df['size'] = df['size'].str.replace("\D", "")

In [75]:
for i in range(len(square_meters)):
    print(i,')','Description:', square_meters[i]['answer'], ' || Space:', square_meters_2[i]['answer'], ' || Manual:', df['size'][i])

0 ) Description: 28  || Space: 28  || Manual: 28
1 ) Description: 75  || Space: 30  || Manual: 75
2 ) Description: 68m2  || Space: 68m2  || Manual: 68
3 ) Description: 26  || Space: 26  || Manual: 26
4 ) Description: 63  || Space: 63  || Manual: 63
5 ) Description: 20 sqm  || Space: 20 sqm  || Manual: 20
6 ) Description: 165  || Space: 165  || Manual: 780
7 ) Description: upper second floor  || Space: 160x200cm  || Manual: nan
8 ) Description: 55  || Space: 55  || Manual: 55
9 ) Description: 1,80 m x 2,20 m  || Space: 1,00 m x 2,00 m  || Manual: 80
10 ) Description: Kottbusser Tor - full size loft bed (sleeps 2)  || Space: Kottbusser Tor - full size loft bed (sleeps 2)  || Manual: 16
11 ) Description: 100  || Space: 100 qm  || Manual: nan
12 ) Description: 1-Room-turn of the century  || Space: Ligne Roset and Pol74  || Manual: nan
13 ) Description: 160 x 200 cm  || Space: 160 x 200 cm  || Manual: nan
14 ) Description: 70m2  || Space: 70m2  || Manual: 70
15 ) Description: 50  || Space: 

TypeError: string indices must be integers

In [71]:
df['description'][38]

"Splendid newly renovated and refurbished 44 square meter (474 square feet) loft apartment in an historical original pre-war Berlin building! Located in the beautiful & historical Prenzlauer Berg on Schwedter Strasse, this apartment is available to people staying for two months or more to work or study - walking distance from tech hub 'The Factory' & GLS Language School. This splendid newly renovated and refurbished 44 square meter loft apartment in an historical original pre-war Berlin building which at the time was built and used as a chocolate factory is situated just off Kastanienallee and a stone's throw to Mauerpark (where the Wall once divided the city), home of Berlin's legendary Sunday flea market.  Schwedter Strasse is right in the centre of Prenzlauer Berg, next to Oderberger Strasse and Zionskirchplatz, amidst historical buildings and beautiful old architecture. The Wall used to run at the end of Schwedter Strasse and Bernauer Strasse, where one of the last remaining parts 

In [50]:
for context,answer in zip(df['space'],square_meters):
    print('context: ', context,'\n')
    print('Answer: ', answer['answer'], ', score:', answer['score'])
    print('\n____________________')

context:  The room is very large, private, cozy, bright, and quiet because it faces the tree-filled courtyard. You can see beautiful sunsets from the window and you also have a balcony. It's 28 m2 in a 100 m2 apartment near Mitte, Prenzlauer Berg, Wedding, Mauerpark, Kastanienallee and Oderbergerstrasse with the flea markets, clubs, cafes and bars. It's also very central, near the Ubahn 8 and 2 and various Sbahns, about 10 minutes from Alexanderplatz and 20 from Kreuzberg and Neuköln.  We have a great dog named Mila, so you'd have to like animals and not have allergies! 

Answer:  28 , score: 0.4605097770690918

____________________
context:  A+++ location! This „Einliegerwohnung“ is an extention of a larger apartment with a separate entrance, bathroom and kitchen. The door to the rest of the apartment is soundproof, hidden, locked and barely noticable (behind mirror in pictures). Your 30 sq meters are facing a quiet courtyard. This wood floored/high ceiling typical Berlin "Altbau" apa

In [99]:
predictions = predict_size(df,'space')

In [116]:
#contexts= []
#answer_texts= []
#answer_start= []

for i in range(40,len(df)):
    
    context = df['space'][i]
    
    answer = predictions[i]['answer']
    
    if answer == 'unknown':
        continue;
        
    start = predictions[i]['start']
    
    print(context)
    print('\n Answer: ', answer, 'start: ',start)
    inp = input()
    
    
    if inp == 'Y' or inp =='y':
        contexts.append(context)
        answer_texts.append(answer)
        answer_start.append(start)
        
    elif inp.isnumeric():
        
        answer = inp
        contexts.append(context)
        answer_texts.append(answer)
        answer_start.append(start)
    
    
    
    
    

Nicely furnished apartment in the heart of Berlin. The bedroom has a comfortable double bed and living room has a sofa bed. Here can one more person to sleep. The large kitchen offers beautiful and comfortable enjoy cooking and a large table where you can spend together and eat nice time together. The bathroom has a bath and washing machine. The balcony offers extra space and relax on warm summer evenings. The apartment is also piano - who can play and wants to is welcome. A park and the river are within a 5 minute walk. The apartment is located in one the most fashionable district of Kreuzberg and is surrounded by the hottest clubs, bars and restaurants. An offer that has something for almost every taste something attractive. The subway is (Silesian Gate) and the bus station are right outside the front door. So you can leave your car and enjoy the city without parking stress. 

 Answer:  5 minute walk start:  512
n
The apartment has three rooms and a big kitchen, is extremely bright a

y
We offer our 4 room apartment in Prenzlauer Berg. It is situated between Kollwitzplatz and the Volkspark Friedrichshain. Details on the apartment: Situated on 4th floor of typical historical Berlin building - very light and quiet (no elevator) Wooden floors, with open kitchen area, bathroom with bathtub and washing machine Kitchen amenities: dishwasher, tea and coffee facilities, water heater, electric oven and gas stove, refrigerator, dishes and cutlery, pots and pans Sheets and towels provided Wireless DSL flat internet 3 bedrooms with double bed Open kitchen area Sleeping possibilities for up to 5 people Balcony Closest tramway station: Prenzlauer Allee or Greifswalder Str or Danziger Str./Winsstr Special note : we periodically house a cat  - not during rental periods (!)- so if you have allergies please let us know!

 Answer:  4 start:  13
n
LUX BERLIN 2 is on the 4th floor of a beautifully restored art-nouveau building (with elevator access). Its impressive 2-metre high windows 

n
The studio apartment is located in the rear building of a typical "Berliner Altbau". Built in 1910 it still has lofty high ceilings and windows.  The main living area has a comfortable queen size bed (160x200), a stylish couch that can be converted into a double bed (184x196), a dinner table with chairs and a spacious wardrobe. The kitchen is equipped with everything you might need, including an oven and hub, a toaster, coffee machine and kettle. There is usually some tea, coffee, salt etc. in the kichen that previous guests have left. We do not, however, provide these items and do not guarantee that they will be there.  The bathroom is spacious and has a big bathtub with a shower curtain so you can enjoy both, refreshing showers or long bubble baths. There is also a washing machine and a dryer in the bathroom.

 Answer:  160x200 start:  201
n
One of the best locations in Berlin. Cheerful, bright, renovated  by a famous italian interior designer, luxury equipments and original artwor

n
You will have an entire 90sqm apartment (about 960 sqft) at your disposal, including a bedroom, two bathrooms, a living room and a kitchen. The bedroom can sleep up to 3 people (bed + pull-out armchair) and the living room up to 2 (in the sofa-bed). We can also accommodate 2 extra infants at no cost who can sleep in the two cots we provide (so no more than 5 adults in total). Wireless internet is of course included, we have a cable connection which is very fast (100Mbs in up/download). The password is written on the fridge. There are 2 TVs in the apartment (both with complimentary Netflix), one in one bedroom and the other in the living room; on both you can watch numerous international channels or connect your computer to watch movies or videos with the HDMI and mini-display port connectors provided.

 Answer:  960 sqft start:  47
n
The lovingly restored apartment is situated in a historic, 200-year old building in the heart of Berlin within walking distance to all of the major poin

y
On request, in the living area 1 - 2 beds can be placed, if the apartment will be used for a longer time. In the sleeping room there is a 160 cm wide double bed and a large wardro- be. The small kitchen is equipped with dishwasher, 2 hotplates, refrigerator with freezer and microwave. The bathroom is equipped with a bathtub and a washing machine. The apartment distinguishes itself, with many small light sources to create a beautiful atmosphere . Facilities: 2 room apartment, 3 floor (lift), double bed (160 cm wide), sleeping sofa 140 cm wide), TV, radio, CD-player, fast wifi. Bath: with bathtub, washing machine, ironing board and iron, hairdryer. Kitchen: fridge, dishwasher, toaster, coffeemaker, water cooker, microwave / grill.

 Answer:  160 cm start:  138
n
This well-equipped apartment sleeps up to four persons. Furniture from Biedermeier to present-day. DSL/WLAN internet access. 55 square metres, ground floor of a 1912 building, windows facing a quiet courtyard. There's a queen-s

n
Es ist eine renovierte Altbauwohnung in einem repräsentativen typischen Berliner Altbau, ruhig im Erdgeschoss des Seitenflügels zum Hofgarten hin gelegen.  Alle 3 Zimmer sind ausgestattet mit Doppelbetten, Schränken und Nachtischen. Im großen Zimmer finden sich zusätzlich eine Schlafcouch mit Lattenrost und Matratze, ein großer Esstisch und ein Fernseher.  Das Bad des Appartements ist gefliest und mit einer Badewanne versehen. Es fließt wahlweise Kalt-/ Warm- oder Heißwasser auch mit hohem Wasserdruck. Waschmaschine und Trockner sind vorhanden.  Die Küche mit Essplatz, Kühlschrank, Mikrowelle, Geschirrspüler und Elektroherd mit Backofen lädt zur Selbstverpflegung ein.

 Answer:  3 start:  161
n
quiet in the backyard with sunny balcony. sleeping room with double bed, living room connects with open kitchen, spiral staircase leads from living room under the roof with space for two additional people to sleep (seperate or together). Balcony has space for 6 to sit and is sunny from late mo

In [100]:
predictions[0]

{'score': 0.7426713705062866, 'start': 181, 'end': 183, 'answer': '28'}

In [124]:
answers = answer_texts.copy()

In [115]:
predictions[40]

{'answer': 'unknown'}

In [125]:
answers.astype(int)

AttributeError: 'list' object has no attribute 'astype'

In [128]:
answers = [int(i) for i in answers]

In [132]:
data = pd.DataFrame(contexts,columns = ['context'])

In [137]:
data['question'] =question
data['answers.text'] = answers
data['answers.start'] = answer_start

In [138]:
data

Unnamed: 0,context,question,answers.text,answers.start
0,"The room is very large, private, cozy, bright,...",What is the size of the room/apartment in squa...,28,181
1,A+++ location! This „Einliegerwohnung“ is an e...,What is the size of the room/apartment in squa...,30,248
2,1st floor (68m2) apartment on Kollwitzplatz/ P...,What is the size of the room/apartment in squa...,68,11
3,"Your room is really big and has 26 sqm, is ver...",What is the size of the room/apartment in squa...,26,32
4,Stay in a stylish loft on the second floor and...,What is the size of the room/apartment in squa...,63,335
5,"The BrightRoom is an approx. 20 sqm (215ft²), ...",What is the size of the room/apartment in squa...,20,29
6,"THE APPARTMENT - 4 bedroom (US, Germany: 5 roo...",What is the size of the room/apartment in squa...,165,216
7,Hello future guests! We want to rent our cute ...,What is the size of the room/apartment in squa...,55,184
8,"A 100 qm fancy and bright designer loft condo,...",What is the size of the room/apartment in squa...,100,2
9,Relax in the leafy 40m2 garden terrace and enj...,What is the size of the room/apartment in squa...,70,105


In [139]:
data.to_csv('data.csv')