# Python Datetime Exercises
1. Get the current day, month, year, hour, minute and timestamp from datetime module

In [None]:
from datetime import datetime

now = datetime.now()

current_day = now.day
current_month = now.month
current_year = now.year
current_hour = now.hour
current_minute = now.minute
current_timestamp = now.timestamp()

print(f"Day: {current_day}")
print(f"Month: {current_month}")
print(f"Year: {current_year}")
print(f"Hour: {current_hour}")
print(f"Minute: {current_minute}")
print(f"Timestamp: {current_timestamp}")

2. Format the current date using this format: "%m/%d/%Y, %H:%M:%S"


In [None]:
from datetime import datetime

now = datetime.now()

formatted_date = now.strftime("%m/%d/%Y, %H:%M:%S")
print(formatted_date)

3. Today is 5 December, 2019. Change this time string to time.

In [None]:
time_string = "5 December, 2019"
time_object = datetime.strptime(time_string, "%d %B, %Y")
print(time_object)

4. Calculate the time difference between now and new year.

In [None]:
today = datetime(year=2025, month=1, day=23)
new_year = datetime(year=2026, month=1, day=1)

time_left_for_newyear = new_year - today

print('Time left for new year: ', time_left_for_newyear)

5. Calculate the time difference between 1 January 1970 and now.

In [None]:
from datetime import datetime

today = datetime.now()
nyear = datetime(year=1970, month=1, day=1)

time_difference = today - nyear

print(f'The Time Difference between "{nyear}" and "{today}" is : ', time_difference)

6. Think, what can you use the datetime module for? Examples:
* Time series analysis
* To get a timestamp of any activities in an application
* Adding posts on a blog

# Answer
* Scheduling tasks
* Logging events
* Measuring execution time of code
* Handling time zones
* Creating countdown timers
* Validating date and time input
* Calculating age from birthdate

# File Handling Exercises:
#### Exercises: Level 1
1. Write a function which count number of lines and number of words in a text. All the files are in the data the folder: 

a) Read obama_speech.txt file and count number of lines and words 
   
b) Read michelle_obama_speech.txt file and count number of lines and words 

c) Read donald_speech.txt file and count number of lines and words 

d) Read melina_trump_speech.txt file and count number of lines and words

In [None]:
def count_lines_and_words(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
        num_lines = len(lines)
        num_words = sum(len(line.split()) for line in lines)
    return num_lines, num_words

# File paths
files = [
    'C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\obama_speech.txt',
    'C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\michelle_obama_speech.txt',
    'C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\donald_speech.txt',
    'C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\melina_trump_speech.txt'
]

# Count lines and words for each file
for file in files:
    lines, words = count_lines_and_words(file)
    print(f'{file}: {lines} lines, {words} words')

2. Read the countries_data.json data file in data directory, create a function that finds the ten most spoken languages

#### Your output should look like this
print(most_spoken_languages(filename='./data/countries_data.json', 10))
[(91, 'English'),
(45, 'French'),
(25, 'Arabic'),
(24, 'Spanish'),
(9, 'Russian'),
(9, 'Portuguese'),
(8, 'Dutch'),
(7, 'German'),
(5, 'Chinese'),
(4, 'Swahili'),
(4, 'Serbian')]

#### Your output should look like this
print(most_spoken_languages(filename='./data/countries_data.json', 3))
[(91, 'English'),
(45, 'French'),
(25, 'Arabic')]

In [None]:
import json
from collections import Counter

def most_spoken_languages(filename, top_n):
    with open(filename, 'r') as file:
        countries_data = json.load(file)
    
    languages_counter = Counter()
    
    for country in countries_data:
        languages_counter.update(country['languages'])
    
    most_common_languages = languages_counter.most_common(top_n)
    return most_common_languages

print(most_spoken_languages(filename='./data/countries_data.json', 10))
print(most_spoken_languages(filename='./data/countries_data.json', 3))

3. Read the countries_data.json data file in data directory, create a function that creates a list of the ten most populated countries

In [None]:
import json

def most_populated_countries(filename, top_n):
    with open(filename, 'r') as file:
        countries_data = json.load(file)
    
    sorted_countries = sorted(countries_data, key=lambda x: x['population'], reverse=True)
    most_populated = [{'country': country['name'], 'population': country['population']} for country in sorted_countries[:top_n]]
    
    return most_populated

print(most_populated_countries(filename='C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\countries_data.json', 10))
print(most_populated_countries(filename='C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\countries_data.json', 3))

# Exercises: Level 2
4. Extract all incoming email addresses as a list from the email_exchange_big.txt file.

In [None]:
import re

def extract_email_addresses(file_path):
    with open(file_path, 'r') as file:
        content = file.read()
    
    email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
    email_addresses = re.findall(email_pattern, content)
    
    return email_addresses

email_addresses = extract_email_addresses('C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\email_exchange_big.txt')
print(email_addresses)

5. find the most common words in the English language. Call the name of your function find_most_common_words, it will take two parameters - a string or a file and a positive integer, indicating the number of words. Your function will return an array of tuples in descending order. Check the output

In [None]:
import re
from collections import Counter

def find_most_common_words(text_or_file, num_words):
    if isinstance(text_or_file, str):
        try:
            with open(text_or_file, 'r') as file:
                text = file.read()
        except FileNotFoundError:
            text = text_or_file
    else:
        raise ValueError("The first parameter should be a string representing text or a file path.")
    
    words = re.findall(r'\b\w+\b', text.lower())
    word_counts = Counter(words)
    most_common = word_counts.most_common(num_words)
    
    return most_common

print(find_most_common_words('C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\obama_speech.txt', 3))

file_path = 'C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\obama_speech.txt'
print(find_most_common_words(file_path, 10))

6. Use the function, find_most_frequent_words to find: 
* a) The ten most frequent words used in Obama's speech 
* b) The ten most frequent words used in Michelle's speech 
* c) The ten most frequent words used in Trump's speech 
* d) The ten most frequent words used in Melina's speech


In [None]:
# a) The ten most frequent words used in Obama's speech
obama_speech_common_words = find_most_common_words('C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\obama_speech.txt', 10)
print("Obama's speech:", obama_speech_common_words)

# b) The ten most frequent words used in Michelle's speech
michelle_speech_common_words = find_most_common_words('C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\michelle_obama_speech.txt', 10)
print("Michelle's speech:", michelle_speech_common_words)

# c) The ten most frequent words used in Trump's speech
trump_speech_common_words = find_most_common_words('C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\donald_speech.txt', 10)
print("Trump's speech:", trump_speech_common_words)

# d) The ten most frequent words used in Melina's speech
melina_speech_common_words = find_most_common_words('C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\melina_trump_speech.txt', 10)
print("Melina's speech:", melina_speech_common_words)

7. Write a python application that checks similarity between two texts. It takes a file or a string as a parameter and it will evaluate the similarity of the two texts. For instance check the similarity between the transcripts of Michelle's and Melina's speech. You may need a couple of functions, function to clean the text(clean_text), function to remove support words(remove_support_words) and finally to check the similarity(check_text_similarity). List of stop words are in the data directory


In [None]:
import re
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def clean_text(text):
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = text.lower()  # Convert to lowercase
    return text

def remove_support_words(text, stop_words):
    words = text.split()
    filtered_words = [word for word in words if word not in stop_words]
    return ' '.join(filtered_words)

def check_text_similarity(text1, text2):
    vectorizer = TfidfVectorizer().fit_transform([text1, text2])
    vectors = vectorizer.toarray()
    cosine_sim = cosine_similarity(vectors)
    return cosine_sim[0, 1]


# Read the speeches
with open('C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\michelle_obama_speech.txt', 'r') as file:
    michelle_speech = file.read()

with open('C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\melina_trump_speech.txt', 'r') as file:
    melina_speech = file.read()

# Clean and remove stop words from the speeches
michelle_speech_cleaned = remove_support_words(clean_text(michelle_speech), stop_words)
melina_speech_cleaned = remove_support_words(clean_text(melina_speech), stop_words)

# Check similarity
similarity = check_text_similarity(michelle_speech_cleaned, melina_speech_cleaned)
print(f"Similarity between Michelle's and Melina's speeches: {similarity}")

8. Find the 10 most repeated words in the romeo_and_juliet.txt

In [None]:
# Find the 10 most repeated words in the romeo_and_juliet.txt
romeo_and_juliet_common_words = find_most_common_words('C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\romeo_and_juliet.txt', 10)
print("Romeo and Juliet:", romeo_and_juliet_common_words)

9. Read the hacker news csv file and find out: 
* a) Count the number of lines containing python or Python 
* b) Count the number lines containing JavaScript, javascript or Javascript 
* c) Count the number lines containing Java and not JavaScript

In [None]:
import pandas as pd

# Read the hacker news CSV file
hacker_news_df = pd.read_csv('C:\\Users\\User\\Desktop\\ArewaDS_Python_Exercises\\data\\hacker_news.csv')

# a) Count the number of lines containing python or Python
python_count = hacker_news_df['title'].str.contains('python', case=False, na=False).sum()
print(f"Number of lines containing 'python' or 'Python': {python_count}")

# b) Count the number lines containing JavaScript, javascript or Javascript
javascript_count = hacker_news_df['title'].str.contains('javascript', case=False, na=False).sum()
print(f"Number of lines containing 'JavaScript', 'javascript' or 'Javascript': {javascript_count}")

# c) Count the number lines containing Java and not JavaScript
java_count = hacker_news_df['title'].str.contains(r'\bJava\b', case=False, na=False).sum()
java_not_javascript_count = java_count - javascript_count
print(f"Number of lines containing 'Java' and not 'JavaScript': {java_not_javascript_count}")