In [31]:
import pandas as pd
import smtplib
import requests
from time import sleep
import random
from bs4 import BeautifulSoup
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
from smtplib import SMTP
import sys


df = pd.read_csv("Apartments.csv", encoding="utf-16", delimiter = ";", error_bad_lines = False)
df.head()



### DEALING WITH MISSING DATA

#df[df.isnull().any(axis= 1)]

df.dropna(inplace = True)
df.reset_index(inplace = True, drop = True)



### CHECK FOR DUPLICATES

#df.duplicated().any()



### DATA ADJUSTMENTS

# removing spaces
cols = df.columns

for col in df.columns:
    df[f"{col}"] = df[f"{col}"].str.strip()
    
    
# seperate action price
akcija = df[df["price"] == "Stanje"]


# regular price
df = df[(df["price"] != "Po dogovoru") & (df["price"] != "Stanje")]
df.reset_index(inplace = True, drop = True)


# seperate data for price per agreement
po_dogovoru = df[df["price"] == "Po dogovoru"]
po_dogovoru.reset_index(inplace = True, drop = True)


# getting action prices 
prices = []

for apartment in akcija["link"]:
    response = requests.get(apartment, headers={'User-Agent': 'Mozilla/5.0'})
    soup = BeautifulSoup(response.text, "html.parser")
    prices.append(soup.find("p", style = "font-size:25px;font-weight:500;background-color:#9dab50;color:#fff;").text[11:-3].strip())
    
    sleep(random.randint(2, 5))

akcija["price"] = prices


# concat regular and action
df = pd.concat([df, akcija])
df.reset_index(inplace = True, drop = True)     

    
    
### PRICE

# remove currency
df["price"] = df["price"].str[:-2].str.strip()

# remove "."
df["price"] = df["price"].str.replace(".", "")

# change price to numeric
df["price"] = pd.to_numeric(df["price"])



# N OF SQM

# taking just numbers
df["n of sqm"] = df["n of sqm"].str[:2]


# removing non num entries
numbers = [str(i) for i in range(100)]

df = df[df["n of sqm"].isin(numbers)]
df.reset_index(inplace = True, drop = True)


# change to numeric
df["n of sqm"] = pd.to_numeric(df["n of sqm"])


# create price of sqm
df["price_sqm"] = (df["price"]/df["n of sqm"]).astype(int)



# N OF ROOMS

# recode
df["n of rooms"].replace(
    
    {"Trosoban (3)": "3",
     "Dvosoban (2)": "2",
     "Četverosoban (4)": "4",
     "Jednosoban (1)": "1",
     "Petosoban i više": "5",
     "Garsonjera" : "0",
     "Jednoiposoban (1.5)": "1.5",}, inplace = True)

# convert to num
df["n of rooms"] = pd.to_numeric(df["n of rooms"], downcast = "float")



# PUBLISH DATE

# take just date
df["publish date"] = df["publish date"].str[:-8]

# convert to date
df["publish date"] = pd.to_datetime(df["publish date"])



# setting up conditions and filtering data based on them
'''
Conditions:
- price <= 150000
- n of rooms >= 2
- n of sqm >= 30
- location Novo Sarajevo and Centar 
- floor != prizemlje & suteren
''' 
df = df[(df["price"] <= 150000) & 
    (df["n of rooms"] >= 2) & 
    (df["n of sqm"] >= 30) & 
    ((df["location"] == "Sarajevo - Centar") | (df["location"] == "Novo Sarajevo")) & 
    (df["floor"] != "Prizemlje") & 
    (df["floor"] != "Suteren")]

df = df.sort_values(by = "publish date", ascending = False)
df.reset_index(inplace = True, drop = True)    



  df = pd.read_csv("Apartments.csv", encoding="utf-16", delimiter = ";", error_bad_lines = False)
b'Skipping line 993: expected 10 fields, saw 11\n'
  df["price"] = df["price"].str.replace(".", "")


In [32]:
df

Unnamed: 0,header,location,price,publish date,n of sqm,floor,n of rooms,furnished?,heating system,link,price_sqm
0,NOVOGRADNJA CENTAR! Dvosoban stan cca 44 m2! U...,Sarajevo - Centar,131850,2022-12-04,44,1,2.0,Nenamješten,Plin,https://www.olx.ba/artikal/47674177/novogradnj...,2996
1,NN Nekretnine: Sprat kuće - Mejtaš - Čekaluša,Sarajevo - Centar,118000,2022-12-01,62,Visoko prizemlje,3.0,Nenamješten,Ostalo,https://www.olx.ba/artikal/46303962/nn-nekretn...,1903
2,Dvosoban stan sa garažom / 3. sprat / Vraca,Novo Sarajevo,139500,2022-11-03,46,3,2.0,Namješten,Centralno (Plin),https://www.olx.ba/artikal/47203433/dvosoban-s...,3032
3,"Stan sa garazom-Dobojska, Sarajevo",Novo Sarajevo,139500,2022-10-03,47,3,2.0,Namješten,Plin,https://www.olx.ba/artikal/47199444/stan-sa-ga...,2968
4,Dvosoban stan novije gradnje od 45m2 u Velešićima,Novo Sarajevo,125000,2022-09-05,45,1,2.0,Namješten,Centralno (Plin),https://www.olx.ba/artikal/48042158/dvosoban-s...,2777
5,NN Nekretnine: Dvosoban adaptiran stan - Bjelave,Sarajevo - Centar,148000,2022-09-05,42,3,2.0,Namješten,Plin,https://www.olx.ba/artikal/48040802/nn-nekretn...,3523
6,MY SPACE/ Stan/ Velesici/ Muhameda Ef Pandze/ ...,Novo Sarajevo,145000,2022-06-21,61,Visoko prizemlje,3.0,Polunamješten,Centralno (Plin),https://www.olx.ba/artikal/48576223/my-space-s...,2377
7,"Dvosoban stan Sarajevo, Centar, Višnjik",Sarajevo - Centar,149000,2022-06-21,45,1,2.0,Namješten,Struja,https://www.olx.ba/artikal/48574330/dvosoban-s...,3311
8,Dvosoban stan - Centar - Koševsko brdo - 36 m2,Sarajevo - Centar,99500,2022-06-20,36,4,2.0,Nenamješten,Struja,https://www.olx.ba/artikal/48565044/dvosoban-s...,2763
9,Dvosoban renoviran stan Sarajevo Centar Bjelav...,Sarajevo - Centar,149000,2022-06-18,45,1,2.0,Namješten,Struja,https://www.olx.ba/artikal/48543663/dvosoban-r...,3311


In [None]:
# email setup

from email.message import EmailMessage
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

sender_email = input(str("Enter your email: "))
rec_email = input(str("Enter email to send to: "))
password = input(str("Enter pass: "))

msg = MIMEMultipart('alternative')
msg["Subject"] = "Your update on apartments"


html = """\
<html>
  <head></head>
  <body>
    {0}
  </body>
</html>
""".format(df.to_html())


part1 = MIMEText(html, 'html')


msg.attach(part1)


with smtplib.SMTP("smtp-mail.outlook.com", 587) as smtp:
    smtp.ehlo()
    smtp.starttls()
    smtp.ehlo()
    
    smtp.login(email, password)
    print("Login successful")
    
    
    smtp.sendmail(sender_email, rec_email, msg.as_string())
    smtp.quit()
    print("Email sent")