In [None]:
import numpy as np
import pandas as pd

df = pd.read_excel('data/SOFT-3.xlsx')

In [None]:
#Install neccessary dependencies
#!python -m pip install -U faker

In [None]:
from faker import Faker

faker = Faker()# Test fake data generation
print("The Faker library can generate fake names. By running 'faker.name()', we get:")
faker.name()

Faker.seed(4321)
dict_names = {name: faker.name() for name in df['student'].unique()}
df['student'] = df['student'].map(dict_names)
df.head(20)


In [None]:
#Method that fetches the CVR API using the companies CVR number.

import urllib.request as request
import json 
import contextlib

def cvrapi(cvr):
  request_a = request.Request(
    url='https://cvrapi.dk/api?country=dk&vat=%s' % cvr,
    headers={
      'User-Agent': 'ds-assignment'})
  with contextlib.closing(request.urlopen(request_a)) as response:
    return json.loads(response.read())

In [None]:
#Put all companies CVR in a list of strings
companies_cvr = df['cvr'].tolist()

#Remove duplicates
unique_cvr = [cvr for cvr in set(companies_cvr)]

print(unique_cvr)

In [None]:
#Call API for each CVR and write response json to a json file

import json

companies_list = []

for c in unique_cvr:
    company_json = (cvrapi(c))
    companies_list.append(company_json)

with open("data/companies.json", "w") as file:
    json.dump(companies_list, file)

In [None]:
#Insert JSON data from file to pandas dataframe
json_df = pd.read_json('data/companies.json')

json_df.head(11)

In [None]:
#Install geopy
#!python -m pip install -U geopy

In [None]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="geoapiExercises")

company_cities = json_df['city'].tolist()
company_addresses = json_df['address'].tolist()

addresses_stripped = [address.split(',')[0] for address in company_addresses]

addresses = []
for i in range(len(addresses_stripped)):
    addresses.append(addresses_stripped[i] + ", " + company_cities[i])

latitudes = []
longitudes = []
for address in addresses:
    location = geolocator.geocode(address)
    latitudes.append((location.latitude))
    longitudes.append((location.longitude))

print(latitudes)
print(longitudes)

In [None]:
#Add latitude and longitude to the Dataframe 'json_df'

#Latitudes
json_df = json_df.assign(latitude=latitudes)

#Longitudes
json_df = json_df.assign(longitude=longitudes)

json_df.head(10)

In [None]:
#Clean dataframe

clean_df = json_df[['vat', 'name', 'city', 'address', 'zipcode', 'latitude', 'longitude']]

clean_df.head(20)

In [None]:
#Install folium
#!python -m pip install -U folium

In [None]:
import folium

# Create a base map centered on Denmark
denmark_map = folium.Map(location=[55.67, 12.57], zoom_start=7)

#Add markers to where students have been in internship
for index, row in clean_df.iterrows():
    folium.Marker([row["latitude"], row["longitude"]], popup=row["name"]).add_to(denmark_map)

# Show the map
denmark_map

In [None]:
#Save cleaned dataframe to CSV file
clean_df.to_csv("data/cleaned.csv", index=False)

In [None]:
import smtplib
import ssl
from email.message import EmailMessage

email_sender = 'cphfa116@gmail.com'
email_password = '***********'
email_receiver = 'tdi@cphbusiness.dk'

subject = 'Data Science Assignment - Automated Email Frederik'
body = """
This is an automated email sent from my python application in the data science course
"""

em = EmailMessage()
em['From'] = email_sender
em['To'] = email_receiver
em['Subject'] = subject
em.set_content(body)

context = ssl.create_default_context()

with smtplib.SMTP_SSL('smtp.gmail.com', 465, context=context) as smtp:
    smtp.login(email_sender, email_password)
    smtp.sendmail(email_sender, email_receiver, em.as_string())