# Session 4 Exercises

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import seaborn as sns

## 1
- Read in the data in the covid.csv file
- Create a date column by parsing the dateRep column into a datetime
- Set the date column as the index
- Use the replace() method to update all occurrences of United_States_of_America and United Kingdom to USA and UK, respectively
- Sort the index

In [4]:
covid = pd.read_csv('data/covid.csv')\
    .assign(date=lambda x: pd.to_datetime(x.dateRep, format='%d/%m/%Y'))\
    .set_index('date')\
    .replace('United_States_of_America', 'USA')\
    .replace('United_Kingdom', 'UK')\
    .sort_index()

## 2
For the 5 countries with the most cases (cumulative), find the day with the largest number of cases.

In [5]:
top_five_countries = (covid
    .groupby('countriesAndTerritories')
    .cases
    .sum()
    .nlargest(5)
    .index)

covid[covid.countriesAndTerritories.isin(top_five_countries)]\
    .groupby('countriesAndTerritories').cases.idxmax()

countriesAndTerritories
Brazil   2020-07-30
India    2020-09-17
Peru     2020-08-17
Russia   2020-07-18
USA      2020-07-25
Name: cases, dtype: datetime64[ns]

## 3
Find the 7-day average change in COVID-19 cases for the last week in the data for the countries found in part 2.

In [8]:
(covid
     .groupby(['countriesAndTerritories',
              pd.Grouper(freq='1D')])
     .cases.sum()
     .unstack(0)
     .diff()
     .rolling(7)
     .mean()
     .last('1W')[top_five_countries])

countriesAndTerritories,USA,India,Brazil,Russia,Peru
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-09-14,473.714286,181.285714,35.285714,36.285714,73.142857
2020-09-15,1513.0,1142.857143,697.428571,46.285714,377.571429
2020-09-16,3478.714286,59.571429,3196.285714,61.428571,-65.0
2020-09-17,-1047.0,308.428571,143.428571,810.0,-29.428571
2020-09-18,865.714286,-18.142857,-607.714286,-688.428571,-227.571429
2020-09-19,306.857143,-604.714286,-560.142857,57.285714,-41.285714


## 4
Find the first date that each country other than China had cases:

In [7]:
(covid
    .pivot(columns='countriesAndTerritories',
           values='cases')
    .drop(columns='China')
    .apply(lambda x: x[x > 0].index.min())
    .sort_values()
    .rename(lambda x: x.replace('_', ' ')))

countriesAndTerritories
Thailand         2020-01-13
Japan            2020-01-15
South Korea      2020-01-20
Taiwan           2020-01-21
USA              2020-01-21
                    ...    
Yemen            2020-04-10
Western Sahara   2020-04-26
Tajikistan       2020-05-01
Comoros          2020-05-02
Lesotho          2020-05-15
Length: 209, dtype: datetime64[ns]

## 5
Rank the countries by total cases using percentiles.

In [9]:
(covid
    .pivot_table(columns='countriesAndTerritories',
                 values='cases',
                 aggfunc='sum')
    .T
    .transform('rank',
               method='max',
               pct=True)
    .sort_values('cases',
                 ascending=False)
    .rename(lambda x: x.replace('_', ' ')))

Unnamed: 0_level_0,cases
countriesAndTerritories,Unnamed: 1_level_1
USA,1.000000
India,0.995238
Brazil,0.990476
Russia,0.985714
Peru,0.980952
...,...
Greenland,0.023810
Montserrat,0.019048
Falkland Islands (Malvinas),0.019048
Holy See,0.009524


## 6
Write a Python program that matches a word containing 'z', not at the start or end of the word.

In [10]:
import re
def text_match(text):
        patterns = '\Bz\B'
        if re.search(patterns,  text):
                return 'Found a match!'
        else:
                return('Not matched!')

print(text_match("The quick brown fox jumps over the lazy dog."))
print(text_match("Python Exercises."))

Found a match!
Not matched!


## 7
Write a Python program to remove leading zeros from an IP address (e.g.: 216.08.094.196 should become 216.8.94.196).

In [11]:
import re
ip = "216.08.094.196"
string = re.sub('\.[0]*', '.', ip)
print(string)

216.8.94.196


## 8
Write a Python program to convert a date of yyyy-mm-dd format to dd-mm-yyyy format.

2026-01-02 should become 02-01-2026

In [13]:
import re
def change_date_format(dt):
        return re.sub(r'(\d{4})-(\d{1,2})-(\d{1,2})', '\\3-\\2-\\1', dt)
dt1 = "2026-01-02"
print("Original date in YYY-MM-DD Format: ",dt1)
print("New date in DD-MM-YYYY Format: ",change_date_format(dt1))

Original date in YYY-MM-DD Format:  2026-01-02
New date in DD-MM-YYYY Format:  02-01-2026


## 9
Write a Python program to extract year, month and date from an url.

`url = "https://www.washingtonpost.com/news/football-insider/wp/2016/09/02/odell-beckhams-fame-rests-on-one-stupid-little-ball-josh-norman-tells-author/"`

output: [('2016', '09', '02')]

In [14]:
import re
def extract_date(url):
        return re.findall(r'/(\d{4})/(\d{1,2})/(\d{1,2})/', url)
url1= "https://www.washingtonpost.com/news/football-insider/wp/2016/09/02/odell-beckhams-fame-rests-on-one-stupid-little-ball-josh-norman-tells-author/"
print(extract_date(url1))

[('2016', '09', '02')]


## 10
Write a Python program to separate and print the numbers of a given string.

`String = "Ten 10, Twenty 20, Thirty 30"`

In [15]:
import re
# Sample string.
text = "Ten 10, Twenty 20, Thirty 30"
result = re.split("\D+", text)
# Print results.
for element in result:
    print(element)


10
20
30


## 11
Write a Python program to replace maximum 2 occurrences of space, comma, or dot with a colon.

In [16]:
import re
text = 'Python Exercises, PHP exercises.'
print(re.sub("[ ,.]", ":", text, 2))

Python:Exercises: PHP exercises.


## 12
Write a Python program to extract values between quotation marks of a string.

In [17]:
import re
text1 = '"Python", "PHP", "Java"'
print(re.findall(r'"(.*?)"', text1))

['Python', 'PHP', 'Java']


## 13
Write a Python program to remove multiple spaces in a string.

In [18]:
import re
text1 = 'Python      Exercises'
print("Original string:",text1)
print("Without extra spaces:",re.sub(' +',' ',text1))

Original string: Python      Exercises
Without extra spaces: Python Exercises


## 14
Write a Python program to find all adverbs (ending on ly) and their positions in a given sentence.

In [19]:
import re
text = "Clearly, he has no excuse for such behavior."
for m in re.finditer(r"\w+ly", text):
    print('%d-%d: %s' % (m.start(), m.end(), m.group(0)))

0-7: Clearly


## 15
Write a Python program to concatenate the consecutive numbers in a given string.

Sample text:
Enter at 1 20 Kearny Street. The security desk can direct you to floor 1 6. Please have your identification ready.

Sample solution:
Enter at 120 Kearny Street. The security desk can direct you to floor 16. Please have your identification ready.

In [20]:
import re
txt = "Enter at 1 20 Kearny Street. The security desk can direct you to floor 1 6. Please have your identification ready."
print("Original string:")
print(txt)
new_txt = re.sub(r"(?<=\d)\s(?=\d)", '', txt)
print('\nAfter concatenating the consecutive numbers in the said string:')
print(new_txt)

Original string:
Enter at 1 20 Kearny Street. The security desk can direct you to floor 1 6. Please have your identification ready.

After concatenating the consecutive numbers in the said string:
Enter at 120 Kearny Street. The security desk can direct you to floor 16. Please have your identification ready.


## 16
Write a Python program that checks whether a word stars and ends with a vowel in a given string. Return true if a word matches the condition; otherwise, return false.

Sample Data:
- ("Red Orange White") -> True
- ("Red White Black") -> False
- ("abcd dkise eosksu") -> True

In [21]:
import re
def test(text):
	return bool(re.findall('[/^[aeiou]$|^([aeiou]).*\1$/', text))

text ="Red Orange White"
print("Original string:", text)
print("Check beginning and end of a word in the said string with a vowel:")
print(test(text))
text ="Red White Black"
print("\nOriginal string:", text)
print("Check beginning and end of a word in the said string with a vowel:")
print(test(text))
text ="abcd dkise eosksu"
print("\nOriginal string:", text)
print("Check beginning and end of a word in the said string with a vowel:")
print(test(text))

Original string: Red Orange White
Check beginning and end of a word in the said string with a vowel:
True

Original string: Red White Black
Check beginning and end of a word in the said string with a vowel:
False

Original string: abcd dkise eosksu
Check beginning and end of a word in the said string with a vowel:
True


## 17
Write a Python program to separate and print the numbers and their position of a given string.


In [22]:
text = "The following example creates an ArrayList with a capacity of 50 elements. Four elements are then added to the ArrayList and the ArrayList is trimmed accordingly."

for m in re.finditer("\d+", text):
    print(m.group(0))
    print("Index position:", m.start())

50
Index position: 62


## 18
The given input strings contains some text followed by - followed by a number. Replace that number with its log value using math.log()

- `s1 = 'first-3.14'`
- `s2 = 'next-123'`

In [23]:
s1 = 'first-3.14'
s2 = 'next-123'

pat = re.compile(r'-(.*)')

import math
pat.sub(lambda m: '-' + str(math.log(float(m[1]))), s1)

pat.sub(lambda m: '-' + str(math.log(float(m[1]))), s2)

'next-4.812184355372417'

## 19
Extract all words between ( and ) from the given input string as a list. Assume that the input will not contain any broken parentheses.

Text:
`ip = 'another (way) to reuse (portion) matched (by) capture groups'`

In [25]:
ip = 'another (way) to reuse (portion) matched (by) capture groups'
re.findall(r'\((.*?)\)', ip)

['way', 'portion', 'by']

## 20
Add [] around words starting with s and containing e and t in any order.

text:
`ip = 'sequoia subtle exhibit asset sets2 tests si_te'`

In [26]:
ip = 'sequoia subtle exhibit asset sets2 tests si_te'

re.sub(r'\bs\w*(t\w*e|e\w*t)\w*', r'[\g<0>]', ip)

'sequoia [subtle] exhibit asset [sets2] tests [si_te]'