In [2]:
### 2 - Loading dataset
import pandas as pd

# Load the dataset
url = r'sunspots.csv'
df = pd.read_csv(url)

# Display the first few rows to understand the structure
print("Original dataset:")
print(df.head())
print("\nColumn names:", df.columns.tolist())

# Load only date and sunspots columns
sunspots = df.loc[:, ('Date','Sunspots')].copy()  # Using .copy() to avoid potential issues
print("\nSunspots DataFrame:")
print(sunspots.head())

# Check data types before conversion
print("\nData types before conversion:")
print(sunspots.dtypes)

# Date Parsing: Using pd.to_datetime
sunspots['Date'] = pd.to_datetime(sunspots['Date'], format='mixed')
print("\nAfter date conversion:")
print(sunspots.head())
print("\nData types after conversion:")
print(sunspots.dtypes)



Original dataset:
   Unnamed: 0        Date  Year  DayOfYear  DayOfWeek  Week  Sunspots  Std  \
0           0  1818-01-01  1818          1          3     1        -1 -1.0   
1           1  1818-01-02  1818          2          4     1        -1 -1.0   
2           2  1818-01-03  1818          3          5     1        -1 -1.0   
3           3  1818-01-04  1818          4          6     1        -1 -1.0   
4           4  1818-01-05  1818          5          0     2        -1 -1.0   

   Observations  Provisional  
0             0            1  
1             0            1  
2             0            1  
3             0            1  
4             0            1  

Column names: ['Unnamed: 0', 'Date', 'Year', 'DayOfYear', 'DayOfWeek', 'Week', 'Sunspots', 'Std', 'Observations', 'Provisional']

Sunspots DataFrame:
         Date  Sunspots
0  1818-01-01        -1
1  1818-01-02        -1
2  1818-01-03        -1
3  1818-01-04        -1
4  1818-01-05        -1

Data types before conversion:
D

In [3]:
### 3 - Handling Time Series Data:

##### 3.1 - Select a specific date range for the year 1990 using pd.date_range and set the 'Date' column as the index using set_index.

# Generate a range of dates from January 1, 1990 to January 10, 1990
date_range = pd.date_range(start='1990-01-01', end='1990-01-10')
print(f"\nGenerated date range:\n{date_range}")

# Check if 'Date' column exists before setting as index
print(f"\nColumns in sunspots DataFrame: {sunspots.columns.tolist()}")

# Set the 'Date' column as the index using set_index
sunspots.set_index('Date', inplace=True)
print("\nAfter setting Date as index:")
print(sunspots.head())


Generated date range:
DatetimeIndex(['1990-01-01', '1990-01-02', '1990-01-03', '1990-01-04',
               '1990-01-05', '1990-01-06', '1990-01-07', '1990-01-08',
               '1990-01-09', '1990-01-10'],
              dtype='datetime64[ns]', freq='D')

Columns in sunspots DataFrame: ['Date', 'Sunspots']

After setting Date as index:
            Sunspots
Date                
1818-01-01        -1
1818-01-02        -1
1818-01-03        -1
1818-01-04        -1
1818-01-05        -1
