### Datetime module

In [7]:
from datetime import date

today = date.today()

print("Today's date:", today)
print("Formatted date:", today.strftime("%Y-%m-%d"))

Today's date: 2024-10-17
Formatted date: 2024-10-17


In [6]:
today.strftime("%y-%m-%d")

'24-10-17'

In [8]:
from datetime import time

current_time = time(14, 30, 45)
print("Current time:", current_time)
print("Formatted time:", current_time.strftime("%H:%M:%S"))

Current time: 14:30:45
Formatted time: 14:30:45


In [9]:
from datetime import datetime

now = datetime.now()
print("Current date and time:", now)
print("Formatted datetime:", now.strftime("%Y-%m-%d %H:%M:%S"))

Current date and time: 2024-10-17 09:49:11.672381
Formatted datetime: 2024-10-17 09:49:11


In [14]:
from datetime import timedelta

delta = timedelta(days=5, hours=3)
print("Delta:", delta)
print("Total seconds in delta:", delta.total_seconds())


Delta: 5 days, 3:00:00
Total seconds in delta: 442800.0


In [15]:
from datetime import datetime, timezone

now = datetime.now(timezone.utc)
print("Current UTC time:", now)

Current UTC time: 2024-10-17 04:22:18.978457+00:00


In [16]:
from datetime import date, time, datetime

d = date(2024, 7, 27)
t = time(15, 30, 45)
dt = datetime(2024, 7, 27, 15, 30, 45)

In [21]:
print(d)
print(t)
print(dt)

2024-07-27
15:30:45
2024-07-27 15:30:45


In [22]:
from datetime import datetime, timedelta

now = datetime.now()
future_date = now + timedelta(days=30)
print("Date 30 days from now:", future_date)

Date 30 days from now: 2024-11-16 09:54:20.790664


In [23]:
from datetime import time

t = time(14, 30)
new_time = t.replace(minute=45)
print("Updated time:", new_time)

Updated time: 14:45:00


In [24]:
new_hours = t.replace(hour=18)

In [25]:
new_hours

datetime.time(18, 30)

In [26]:
from datetime import datetime

now = datetime.now()
weekday = now.weekday()  # Monday is 0, Sunday is 6
week_number = now.isocalendar()[1]  # Week number of the year

print("Weekday (0=Monday, 6=Sunday):", weekday)
print("ISO calendar week number:", week_number)

Weekday (0=Monday, 6=Sunday): 3
ISO calendar week number: 42


In [27]:
weekday = now.weekday() 

In [28]:
weekday

3

In [31]:
week_number = now.isocalendar()

In [32]:
week_number

datetime.IsoCalendarDate(year=2024, week=42, weekday=4)

In [37]:
week_number = now.isocalendar()[0]

In [38]:
week_number

2024

In [39]:
import re

In [40]:
emails = [
    'alice@example.com',
    'bob.smith@company.org',
    'charlie_jones@subdomain.co.uk',
    'david123@another-domain.net'
]

In [41]:
#Extract the domains from these email addresses

# Define the pattern to match email domains

pattern = r'@([\w\.-]+)'

In [42]:
# Extract domains
domains = [re.search(pattern, i).group(1) for i in emails]

In [43]:
domains

['example.com', 'company.org', 'subdomain.co.uk', 'another-domain.net']

In [45]:
#Example 1: Extracting Dates from Text
#Suppose you have a dataset containing logs with dates in various formats, and you need to standardize or extract these dates.
#Here's how you might do that:
    
    
logs = [
    "User login on 2024-07-25 at 14:32",
    "Error reported on 25/07/2024",
    "Update completed on 2024.07.25",
    "Maintenance scheduled for 25-07-2024"
]

In [50]:
#Extract all dates from these logs, regardless of their format.


# Define the pattern to match dates in different formats
pattern = r'\b(\d{4}[-.]\d{2}[-.]\d{2}|\d{2}[-/]\d{2}[-/]\d{4})\b'

In [51]:
# Extract dates
dates = re.findall(pattern, ' '.join(logs))

print(dates)

['2024-07-25', '25/07/2024', '2024.07.25', '25-07-2024']


In [52]:
#Example 2: Extracting Phone Numbers
#You might need to extract phone numbers from a dataset containing contact information.


contacts = [
    "John Doe: +1-800-555-1234",
    "Jane Smith: (555) 678-9012",
    "Alice Johnson: 555.234.5678",
    "Bob Brown: 555-3456"
]

In [53]:
#Extract phone numbers in various formats.


# Define the pattern to match phone numbers in different formats
pattern = r'\+?\d{1,2}[-.\s]?(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}'

In [54]:
# Extract phone numbers
phone_numbers = re.findall(pattern, ' '.join(contacts))

print(phone_numbers)

['800-', '']


In [55]:
contacts = [
    "John Doe: +1-800-555-1234",
    "Jane Smith: (555) 678-9012",
    "Alice Johnson: 555.234.5678",
    "Bob Brown: 555-3456"
]

# Define the pattern to match phone numbers in different formats
pattern = r'\+?\d{0,2}[-.\s]?(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}'

# Extract phone numbers using the pattern
phone_numbers = []
for contact in contacts:
    match = re.search(pattern, contact)
    if match:
        phone_numbers.append(match.group())

print(phone_numbers)

['+1-800-555-1234', ' (555) 678-9012', ' 555.234.5678', ' 555-3456']


In [None]:
Explanation of the Pattern:
\+?: Matches an optional "+" sign.
\d{0,2}: Matches up to 2 digits for country code (optional).
[-.\s]?: Matches an optional separator (dash, dot, or space).
(\(?\d{3}\)?[-.\s]?)?: Matches an optional area code in parentheses or not, followed by an optional separator.
\d{3}: Matches the first 3 digits of the phone number.
[-.\s]?: Matches an optional separator.
\d{4}: Matches the last 4 digits of the phone number.

In [56]:
#Example 3: Cleaning Up Text Data
#Suppose you have a dataset with noisy text data and you need to remove all non-alphanumeric characters except spaces.



text_data = [
    "Hello! This is a test.",
    "Here, we have: numbers 1234 and symbols @$%^&*!",
    "Clean this text: remove #hashtags and @mentions!"
]


In [57]:
#Remove all non-alphanumeric characters except spaces.


# Define the pattern to remove non-alphanumeric characters except spaces
pattern = r'[^\w\s]'

In [58]:
# Clean text
cleaned_texts = [re.sub(pattern, '', text) for text in text_data]

In [59]:
cleaned_texts

['Hello This is a test',
 'Here we have numbers 1234 and symbols ',
 'Clean this text remove hashtags and mentions']

In [None]:
The regex pattern r'[^\w\s]' can be broken down as follows:

[]: This denotes a character class, which matches any single character within the brackets.

^: When used at the beginning of a character class (right after the opening bracket), it negates the class. This means that the pattern will match any character that is not in the specified set.

\w: This is a shorthand character class that matches any "word" character, which includes:

Uppercase letters (A-Z)
Lowercase letters (a-z)
Digits (0-9)
Underscore (_)
\s: This matches any whitespace character, including spaces, tabs, and newline characters.

In [60]:
#Example 1: Validating Email Addresses
#You might need to validate email addresses to ensure they follow a standard format.


emails = [
    "valid.email@example.com",
    "invalid-email@.com",
    "another.valid.email@domain.co",
    "yet.another@domain",
    "no_at_sign_domain.com"
]

In [62]:
#Identify which email addresses are valid.


# Define the pattern for a basic email validation
pattern = r'^[\w\.-]+@[\w\.-]+\.\w+$'

In [63]:
# Check email validity
valid_emails = [email for email in emails if re.match(pattern, email)]

print(valid_emails)

['valid.email@example.com', 'another.valid.email@domain.co']


In [65]:
import statistics as st

In [69]:
x = [2,4,6,8]
y = [1,4,3,2]

In [70]:
st.correlation(x,y)

0.2

In [71]:
st.mean(x)

5

In [72]:
st.median(x)

5.0

In [73]:
import scipy

In [75]:
scipy.stats.zscore(x)

array([-1.34164079, -0.4472136 ,  0.4472136 ,  1.34164079])

In [76]:
X= [1,2,3,4,5,6,7,8,9]

In [77]:
scipy.stats.zscore(X)

array([-1.54919334, -1.161895  , -0.77459667, -0.38729833,  0.        ,
        0.38729833,  0.77459667,  1.161895  ,  1.54919334])