In [12]:
#Importing the necessary modules

import findspark
from pyspark import SparkContext
from pyspark.sql import SparkSession, Window, Row
from pyspark.sql.functions import *
from pyspark.sql.types import *
import matplotlib.pyplot as plt


In [21]:
#Creating or getting the Spark session
spark = SparkSession.builder.appName("testingApplication").getOrCreate()

In [26]:
#Basic Functions

def load_df(filename):
    df = spark.read.format('csv').options(header="true").load(filename)
    return df

In [31]:
#Creating a dataframe

df = load_df("datasets/scholarships.csv")

df.limit(5).show()

+---+--------------------+--------------------+----------------+-----------------+-------------+
|_c0|               title|             degrees|           funds|             date|     location|
+---+--------------------+--------------------+----------------+-----------------+-------------+
|  0|Schaefer Halleen’...|Master, Bachelor,...|           $1000|     30 June 2022|united-states|
|  1|President’s Schol...|Master, Bachelor,...|           $2500|     31 July 2022|united-states|
|  2|International Stu...|Master, Bachelor,...|    Up to $2,000| 22 December 2022|united-states|
|  3|Master in Busines...|              Master|80% Tuition Fees|15 September 2022|united-states|
|  4|Improve Men’s Hea...|Master, Bachelor,...|           $2000|      31 May 2022|united-states|
+---+--------------------+--------------------+----------------+-----------------+-------------+



In [47]:
#Let's register the dataframe as a SQL temporary view in order to make queries
df.createOrReplaceTempView("scholarships")

#Query all the scholarships that procede from outside the United States of America
sqlDF = spark.sql("SELECT * FROM scholarships WHERE scholarships.location != 'united-states'")
sqlDF.show()

+---+--------------------+--------------------+--------------------+-----------------+--------------+
|_c0|               title|             degrees|               funds|             date|      location|
+---+--------------------+--------------------+--------------------+-----------------+--------------+
| 15|GREAT Scholarship...|              Master|             £10,000|      31 May 2022|united-kingdom|
| 16|Diversity Scholar...|              Master|     50% scholarship|      31 May 2022|united-kingdom|
| 17|Beth Mead Scholar...|    Bachelor, Master|              £1,200|   31 August 2022|united-kingdom|
| 18|Malaysia Scholars...|              Master|              £4,000|     29 June 2022|united-kingdom|
| 19|PhD Studentships ...|              Master|£16,062 p.a., tui...|     10 June 2022|united-kingdom|
| 20|Strathclyde Busin...|              Master|        Tuition fees|             null|united-kingdom|
| 21|International Stu...|Master, Bachelor,...|        Up to $2,000| 22 December 2

In [50]:
#Show all the interships that are only for students getting a PhD and are from Canada
sqlDF = spark.sql("SELECT * FROM scholarships WHERE scholarships.location = 'canada' AND scholarships.degrees = 'Phd'")
sqlDF.show()

+---+--------------------+-------+--------------------+-------------+--------+
|_c0|               title|degrees|               funds|         date|location|
+---+--------------------+-------+--------------------+-------------+--------+
|161|P.E.O. Internatio...|    Phd|             $20,000|         null|  canada|
|270|Silver Anniversar...|    Phd|             $20,000|         null|  canada|
|392|International Pos...|    Phd|$55,000 p.a. + be...|         null|  canada|
|400|University of Ott...|    Phd|         $9,000 p.a.|         null|  canada|
|499|Fully-Funded PhD ...|    Phd|             £15,009|         null|  canada|
|505|IPRA Foundation P...|    Phd|        Fully Funded|Always Active|  canada|
|506|Centre for Studie...|    Phd|    Partially Funded|Always Active|  canada|
|509|European Molecula...|    Phd|        Fully Funded|Always Active|  canada|
|510|John Dillon Fello...|    Phd|        Fully Funded|Always Active|  canada|
|511|Konrad von Moltke...|    Phd|        Fully Fund

In [62]:
#Show all the interships from Europe that are available for students getting a Phd, but other type of students also may apply
sqlDF = spark.sql("SELECT * FROM scholarships WHERE scholarships.location = 'europe' AND scholarships.degrees LIKE '%Phd%'")
sqlDF.show()

+---+--------------------+--------------------+--------------------+----------------+--------+
|_c0|               title|             degrees|               funds|            date|location|
+---+--------------------+--------------------+--------------------+----------------+--------+
| 45|International Stu...|Master, Bachelor,...|        Up to $2,000|22 December 2022|  europe|
| 50|Scholarships for ...|Master, Bachelor,...|       $100 to $3000|   24 March 2027|  europe|
| 51|LAPTOP Scholarshi...|Master, Bachelor,...|         up to $3000|      6 May 2022|  europe|
| 53|Five scholarships...|Master, Bachelor,...|               $3000|      6 May 2022|  europe|
|167|Eon Essay Contest...|Master, Bachelor,...|15,000 USD top pr...|    15 June 2022|  europe|
|168| Anonymous Hope Fund|Master, Bachelor,...|               $3000| 15 January 2026|  europe|
|171|The Annual IELTS ...|Master, Bachelor,...|              $1,000|    1 April 2023|  europe|
|286|FindDataLab Resea...|Bachelor, Master,...|Up 