In [None]:
# Module 17 - Pandas Exercises
# This cell imports the pandas library and sets up the CSV file path.

import pandas as pd
import sys
sys.path.append('./data')

In [None]:
# Load the hacker_news.csv file into a pandas DataFrame
try:
    hacker_news_df = pd.read_csv('hacker_news.csv')
    print("The csv file has been loaded.")
except FileNotFoundError:
    print("Error: File 'hacker_news.csv' not found in the data folder. Please check and try again.")
    hacker_news_df = None # set to none so that tests will not be run.
except Exception as e:
    print(f"Error loading the CSV: {e}")
    hacker_news_df = None # set to none so that tests will not be run.

In [None]:
# Get the first five rows of the DataFrame

if hacker_news_df is not None:
  print("First Five Rows:")
  print(hacker_news_df.head())

In [None]:
# Get the last five rows of the DataFrame
if hacker_news_df is not None:
    print("\nLast Five Rows:")
    print(hacker_news_df.tail())

In [None]:
# Get the 'title' column as a pandas Series
if hacker_news_df is not None:
    title_series = hacker_news_df['title']
    print("\nTitle Column as Pandas Series:")
    print(title_series.head())  # Show the first few elements of the Series

In [None]:
# Get the number of rows and columns in the DataFrame

if hacker_news_df is not None:
    num_rows = hacker_news_df.shape[0]
    num_cols = hacker_news_df.shape[1]
    print(f"\nNumber of Rows: {num_rows}")
    print(f"Number of Columns: {num_cols}")

In [None]:
# Filter titles which contain the word "python"
if hacker_news_df is not None:
    python_titles = hacker_news_df[hacker_news_df['title'].str.contains('python', case=False, na=False)]
    print("\nTitles Containing 'python':")
    print(python_titles[['title', 'url']].head())  # Show the first few filtered titles and urls

In [None]:
# Filter titles which contain the word "JavaScript"
if hacker_news_df is not None:
    javascript_titles = hacker_news_df[hacker_news_df['title'].str.contains('JavaScript', case=False, na=False)]
    print("\nTitles Containing 'JavaScript':")
    print(javascript_titles[['title', 'url']].head())  # Show the first few filtered titles and urls

In [None]:
# Basic descriptive statistics of numerical columns
if hacker_news_df is not None:
    print("\nData Exploration:")
    print("\nDataFrame info:")
    hacker_news_df.info() # show the column names and dtypes
    print("\nDescriptive Statistics (numerical columns):")
    print(hacker_news_df.describe()) #show basic stats of numerical columns.
    print("\nValue counts for type column:")
    print(hacker_news_df['type'].value_counts()) #count values in the type column
    print("\nValue counts for author column:")
    print(hacker_news_df['author'].value_counts().head(10)) #count values in the author column

    print("\nExample of few rows:")
    print(hacker_news_df.head()) #print first few rows for reference