In [1]:
# Author: Andres Melendez
# Date: 2024-10-14
# Description: Generates a 3x4 NumPy array after seeding the random generator, saves it as np.csv,
# reads the CSV as a DataFrame, and writes it to another CSV file.

import numpy as np
import pandas as pd

def generate_array(seed: int, shape: tuple) -> np.ndarray:
    """
    Generates a random NumPy array with the given seed and shape.
    
    :param seed: The random seed to ensure reproducibility.
    :param shape: The desired shape of the array.
    :return: A NumPy array with random values.
    """
    np.random.seed(seed)
    return np.random.rand(*shape)


In [2]:
def save_array_to_csv(array: np.ndarray, filename: str) -> None:
    """
    Saves the NumPy array to a CSV file.
    
    :param array: The NumPy array to save.
    :param filename: The name of the file to save the array in.
    """
    np.savetxt(filename, array, delimiter=',')


In [3]:
def read_csv_to_dataframe(filename: str) -> pd.DataFrame:
    """
    Reads a CSV file and converts it to a Pandas DataFrame.
    
    :param filename: The name of the CSV file to read.
    :return: A Pandas DataFrame containing the CSV data.
    """
    return pd.read_csv(filename, header=None)



In [4]:
def write_dataframe_to_csv(dataframe: pd.DataFrame, filename: str) -> None:
    """
    Writes a Pandas DataFrame to a CSV file.
    
    :param dataframe: The DataFrame to write.
    :param filename: The name of the file to save the DataFrame in.
    """
    dataframe.to_csv(filename, index=False)



In [5]:
# Main logic
try:
    # Generate a 3x4 NumPy array with a random seed of 42
    array = generate_array(seed=42, shape=(3, 4))

    # Save the array to np.csv
    save_array_to_csv(array, 'np.csv')

    # Read the CSV file into a DataFrame
    df = read_csv_to_dataframe('np.csv')

    # Print the DataFrame to verify the contents
    print(df)

    # Write the DataFrame to a new CSV file
    write_dataframe_to_csv(df, 'df_output.csv')

except Exception as e:
    # Handle any exceptions during file operations or array manipulations
    print(f"An error occurred: {e}")
finally:
    print("Process completed.")


          0         1         2         3
0  0.374540  0.950714  0.731994  0.598658
1  0.156019  0.155995  0.058084  0.866176
2  0.601115  0.708073  0.020584  0.969910
Process completed.


In [6]:
# Description: Generates a 365x4 NumPy array with random values, stores it in CSV and NumPy formats, 
# checks the file sizes and shapes, and works with a Pandas DataFrame to save and retrieve the data using pickle.

import os
import pickle

def generate_array(shape: tuple) -> np.ndarray:
    """
    Generates a random NumPy array with the specified shape.
    
    :param shape: The desired shape of the array.
    :return: A NumPy array with random values.
    """
    return np.random.rand(*shape)


In [7]:
def get_file_size(filename: str) -> int:
    """
    Returns the size of the file in bytes.
    
    :param filename: The name of the file to check.
    :return: The size of the file in bytes.
    """
    return os.path.getsize(filename)


In [8]:
def save_array_numpy_format(array: np.ndarray, filename: str) -> None:
    """
    Saves the NumPy array in .npy format.
    
    :param array: The NumPy array to save.
    :param filename: The name of the file to save the array in.
    """
    np.save(filename, array)


In [9]:
def load_array_numpy_format(filename: str) -> np.ndarray:
    """
    Loads a NumPy array from a .npy file.
    
    :param filename: The name of the file to load the array from.
    :return: The loaded NumPy array.
    """
    return np.load(filename)


In [10]:
def save_dataframe_to_pickle(dataframe: pd.DataFrame, filename: str) -> None:
    """
    Saves a Pandas DataFrame to a pickle file.
    
    :param dataframe: The DataFrame to save.
    :param filename: The name of the pickle file.
    """
    with open(filename, 'wb') as f:
        pickle.dump(dataframe, f)


In [11]:
def load_dataframe_from_pickle(filename: str) -> pd.DataFrame:
    """
    Loads a Pandas DataFrame from a pickle file.
    
    :param filename: The name of the pickle file.
    :return: The loaded DataFrame.
    """
    with open(filename, 'rb') as f:
        return pickle.load(f)


In [12]:
# Main logic
try:
    # Generate a 365x4 NumPy array with random values
    array = generate_array(shape=(365, 4))

    # Save the array to a CSV file and check its size
    csv_filename = 'np_array.csv'
    save_array_to_csv(array, csv_filename)
    csv_size = get_file_size(csv_filename)
    print(f"CSV file size: {csv_size} bytes")

    # Save the array in NumPy format and check its shape and file size
    npy_filename = 'np_array.npy'
    save_array_numpy_format(array, npy_filename)
    loaded_array = load_array_numpy_format(npy_filename)
    npy_size = get_file_size(npy_filename)
    print(f"NumPy file size: {npy_size} bytes")
    print(f"Loaded NumPy array shape: {loaded_array.shape}")

    # Create a DataFrame from the array, save it to a pickle file, and retrieve it
    df = pd.DataFrame(array, columns=['Column1', 'Column2', 'Column3', 'Column4'])
    pickle_filename = 'df_pickle.pkl'
    save_dataframe_to_pickle(df, pickle_filename)

    # Load the DataFrame from the pickle file and print its size
    loaded_df = load_dataframe_from_pickle(pickle_filename)
    pickle_size = get_file_size(pickle_filename)
    print(f"Pickle file size: {pickle_size} bytes")

except Exception as e:
    print(f"An error occurred: {e}")
finally:
    print("Process completed.")

CSV file size: 36865 bytes
NumPy file size: 11808 bytes
Loaded NumPy array shape: (365, 4)
Pickle file size: 12375 bytes
Process completed.


In [13]:
# Description: Saves a NumPy array to an Excel file, reads it back into a DataFrame, and prints the results.

def save_array_to_excel(array: np.ndarray, filename: str) -> None:
    """
    Saves the NumPy array to an Excel file.
    
    :param array: The NumPy array to save.
    :param filename: The name of the Excel file to save the array in.
    """
    df = pd.DataFrame(array, columns=['Column1', 'Column2', 'Column3', 'Column4'])
    df.to_excel(filename, index=False)


In [14]:
def read_excel_to_dataframe(filename: str) -> pd.DataFrame:
    """
    Reads an Excel file and converts it to a Pandas DataFrame.
    
    :param filename: The name of the Excel file to read.
    :return: A Pandas DataFrame containing the Excel data.
    """
    return pd.read_excel(filename)


In [15]:
# Main logic
try:
    # Generate the same 365x4 NumPy array (from previous steps)
    array = np.random.rand(365, 4)

    # Save the array to an Excel file
    excel_filename = 'np_array.xlsx'
    save_array_to_excel(array, excel_filename)
    print(f"Array successfully saved to {excel_filename}")

    # Read the Excel file back into a DataFrame
    df_from_excel = read_excel_to_dataframe(excel_filename)

    # Print the DataFrame to verify the contents
    print("DataFrame from Excel file:")
    print(df_from_excel)

except Exception as e:
    print(f"An error occurred: {e}")
finally:
    print("Process completed.")

Array successfully saved to np_array.xlsx
DataFrame from Excel file:
      Column1   Column2   Column3   Column4
0    0.878516  0.404140  0.327033  0.667593
1    0.807846  0.762285  0.797814  0.435583
2    0.817834  0.120209  0.544489  0.005759
3    0.324586  0.366462  0.396173  0.695467
4    0.388558  0.448694  0.237544  0.373252
..        ...       ...       ...       ...
360  0.729345  0.966845  0.224293  0.663047
361  0.741896  0.848425  0.422629  0.302931
362  0.325295  0.712621  0.816779  0.181614
363  0.370941  0.901940  0.806694  0.984858
364  0.754248  0.393195  0.590638  0.661014

[365 rows x 4 columns]
Process completed.


In [16]:
import json

# Define the JSON string
json_string = '{"country":"Netherlands","dma_code":"0","timezone":"Europe/Amsterdam","area_code":"0","ip":"46.19.37.108","asn":"AS196752","continent_code":"EU","isp":"Tilaa V.O.F.","longitude":5.75,"latitude":52.5,"country_code":"NL","country_code3":"NLD"}'

# Parse the JSON string using json.loads()
data = json.loads(json_string)

# Print the value for the “country” field
print(f"Original country: {data['country']}")

# Overwrite the value for "Netherlands" with a new value (e.g., "Germany")
data['country'] = "Germany"

# Print the updated dictionary
print(f"Updated country: {data['country']}")


Original country: Netherlands
Updated country: Germany


In [17]:
from io import StringIO

# Use StringIO to wrap the JSON string and create a Pandas Series
json_io = StringIO(json_string)
series = pd.read_json(json_io, typ='series')

# Change the country value again (for example, change it to "Belgium")
series['country'] = "Belgium"

# Convert the updated Pandas Series back to a JSON string
updated_json_string = series.to_json()

# Print the updated JSON string
updated_json_string


'{"country":"Belgium","dma_code":"0","timezone":"Europe\\/Amsterdam","area_code":"0","ip":"46.19.37.108","asn":"AS196752","continent_code":"EU","isp":"Tilaa V.O.F.","longitude":5.75,"latitude":52.5,"country_code":"NL","country_code3":"NLD"}'

In [18]:
"""
Title: Exercise 9.2
Author: Armando Fandango
Date: 24 October 2024
Modified By: Andres Melendez
Description:This Python script uses BeautifulSoup to parse an HTML file and extract various
elements like div, dfn, a tags, and list items. It performs tasks such as counting elements
with specific attributes, selecting elements via CSS selectors, and searching for text strings
using regular expressions.
"""

from bs4 import BeautifulSoup
import re

# Parsing the HTML file using BeautifulSoup
soup = BeautifulSoup(open('loremIpsum.html'), "html.parser")

# Printing the first 'div' and its class attribute
print("First div\n", soup.div)
print("First div class", soup.div['class'])

# Printing the text of the first 'dfn' inside 'dl > dt'
print("First dfn text", soup.dl.dt.dfn.text)

# Iterating through all 'a' tags and printing their text and href attribute
for link in soup.find_all('a'):
    print("Link text", link.string, "URL", link.get('href'))

# Enumerating through all 'div' tags and printing their contents
for i, div in enumerate(soup('div')):
    print(i, div.contents)

# Searching for a 'div' with id="official" and printing the third element's content
official_div = soup.find_all("div", id="official")
print("Official Version", official_div[0].contents[2].strip())

# Counting and printing the number of elements with a class attribute
print("# elements with class", len(soup.find_all(class_=True)))

# Finding and counting 'div' tags with class="tile"
tile_class = soup.find_all("div", class_="tile")
print("# Tile classes", len(tile_class))

# Finding and counting 'div' tags with a class name containing "tile"
print("# Divs with class containing tile", len(soup.find_all("div", class_=re.compile("tile"))))

# Using a CSS selector to find and print 'div' elements with class 'notile'
print("Using CSS selector\n", soup.select('div.notile'))

# Selecting the first two list items in an ordered list and printing them
print("Selecting ordered list list items\n", soup.select("ol > li")[:2])

# Printing the second list item in the ordered list using nth-of-type CSS selector
print("Second list item in ordered list", soup.select("ol > li:nth-of-type(2)"))

# Searching for text strings matching "2014" using a regex
print("Searching for text string", soup.find_all(string=re.compile("2014")))


First div
 <div class="tile">
<h4>Development</h4>
     0.10.1 - July 2014<br/>
</div>
First div class ['tile']
First dfn text Quare attende, quaeso.
Link text loripsum.net URL http://loripsum.net/
Link text Poterat autem inpune; URL http://loripsum.net/
Link text Is es profecto tu. URL http://loripsum.net/
0 ['\n', <h4>Development</h4>, '\n     0.10.1 - July 2014', <br/>, '\n']
1 ['\n', <h4>Official Release</h4>, '\n     0.10.0 June 2014', <br/>, '\n']
2 ['\n', <h4>Previous Release</h4>, '\n     0.09.1 June 2013', <br/>, '\n']
Official Version 0.10.0 June 2014
# elements with class 3
# Tile classes 2
# Divs with class containing tile 3
Using CSS selector
 [<div class="notile">
<h4>Previous Release</h4>
     0.09.1 June 2013<br/>
</div>]
Selecting ordered list list items
 [<li>Cur id non ita fit?</li>, <li>In qua si nihil est praeter rationem, sit in una virtute finis bonorum;</li>]
Second list item in ordered list [<li>In qua si nihil est praeter rationem, sit in una virtute finis bon