# Smoothing Land-Ocean Temperature Data

This workflow **loads a data file** with global temperature data and **smooths it with several different algorithms**. At the end you can **select a particular year range** and **create a new data file** containing the smoothed data for the range desired.

In [None]:
import pandas as pd
import os

## Load data from file

In [None]:
# Load data into memory from file
DATA_DIR = 'data'
DATA_FILE = 'land-ocean-temp-index.csv'

df = pd.read_csv(os.path.join(DATA_DIR, DATA_FILE), escapechar='#')
df

## Plot original data

In [None]:
from matplotlib import pyplot as plt

plt.xlabel('Year')
plt.ylabel('Temperature')
plt.plot(df['Year'], df['Temperature'])
plt.show()

## Add column for Savitzky-Golay filter

In [None]:
from scipy.signal import savgol_filter

# Set the window size and polynomial order for the Savitzky-Golay filter
window_size = 7
poly_order = 5

# Apply Savitzky-Golay smoothing to the Temperature column
df['Savitzky-Golay'] = savgol_filter(df['Temperature'], window_size, poly_order)
df

In [None]:
plt.xlabel('Year')
plt.ylabel('Temperature')
plt.plot(df['Year'], df['Temperature'])
plt.plot(df['Year'], df['Savitzky-Golay'])
plt.show()

## Select a range of data

In [None]:
from_year = 1920
to_year = 1980

In [None]:
selected_range = df[(df['Year'] >= from_year) & (df['Year'] <= to_year)]

## Plot selected data with smoothed curve

In [None]:
plt.xlabel('Year')
plt.ylabel('Temperature')
plt.plot(selected_range['Year'], selected_range['Temperature'])
plt.plot(selected_range['Year'], selected_range['Savitzky-Golay']) 
plt.show()

## Save selected data to file

In [None]:
selected_range.to_csv('data/output.csv', index=False)