In [1]:
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://archive.apache.org/dist/spark/spark-3.2.4/spark-3.2.4-bin-hadoop3.2.tgz
!tar xf spark-3.2.4-bin-hadoop3.2.tgz
!pip install -q findspark

In [2]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-3.2.4-bin-hadoop3.2"

In [3]:
import findspark
findspark.init()
findspark.find()

'/content/spark-3.2.4-bin-hadoop3.2'

In [5]:
# İmporting libraries
import math
from pyspark import SparkContext

In [25]:
# Initialize SparkContext
sparkCtx = SparkContext.getOrCreate()

# Load dataset
file_path = "/content/DollarDataset.txt"
data = sparkCtx.textFile(file_path)

# Filter out lines that do not have the expected format and convert the third column to float
def parse_line(line):
    parts = line.split("\t")
    try:
        date = parts[1]
        value = float(parts[2].replace(',', '.'))
        return (date, value)
    except (IndexError, ValueError):
        return None

structured_data = data.map(parse_line).filter(lambda x: x is not None)

# Collect the data as a list
collected_data = structured_data.collect()

percentage_increases = []

# Loop through the collected data and compute percentage increases
for i in range(1, len(collected_data)):
    prev_day = collected_data[i-1]
    current_day = collected_data[i]
    increase = 100 * (current_day[1] - prev_day[1]) / prev_day[1]
    percentage_increases.append(((current_day[0], prev_day[0]), increase))

# Sort to get top 5 increases
sorted_increases = sorted(percentage_increases, key=lambda x: x[1], reverse=True)[:5]

for record in sorted_increases:
    print(f"Date: {record[0][0]} Previous Date: {record[0][1]} Percentage Increase: {record[1]:.2f}%")

# Stop the SparkContext
sparkCtx.stop()


Date: 22-08-1960 Previous Date: 19-08-1960 Percentage Increase: 221.43%
Date: 25-01-1980 Previous Date: 24-01-1980 Percentage Increase: 100.00%
Date: 10-08-1970 Previous Date: 07-08-1970 Percentage Increase: 65.00%
Date: 12-06-1979 Previous Date: 11-06-1979 Percentage Increase: 32.08%
Date: 01-03-1978 Previous Date: 28-02-1978 Percentage Increase: 29.87%
