In [None]:


library(tidyverse) # metapackage of all tidyverse packages

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

list.files(path = "../input")

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#loading data & preprocessing
df21 <- read.csv("../input/world-happiness-2021-report/world-happiness-report-2021.csv")
df21 <- subset(df21, select=c("Country.name", 
                              "Regional.indicator", 
                              "Ladder.score",
                              "Logged.GDP.per.capita", 
                              "Social.support", 
                              "Healthy.life.expectancy", 
                              "Freedom.to.make.life.choices",
                              "Generosity", 
                              "Perceptions.of.corruption", 
                              "Dystopia...residual"))


df21 <- df21 %>% 
 rename(
     Country = Country.name, 
     Region = Regional.indicator,
     Happiness_score = Ladder.score, 
     GDP = Logged.GDP.per.capita,
     Social_support = Social.support, 
     Life_expectancy = Healthy.life.expectancy,
     Freedom = Freedom.to.make.life.choices, 
     Corruption = Perceptions.of.corruption,
     Dystopia_residual = Dystopia...residual 
)
df21

In [None]:
colnames(df21)

In [None]:
library(ggplot2)
library(dplyr)
library(corrplot)
library(caTools)
library(ggpubr)
library(forcats)


In [None]:
#attributes(df21)
str(df21)
#colnames(df21)
#df21

In [None]:
#Top and Bottom Ten Happiest Countries

df21 %>% arrange(
                desc(Happiness_score)) %>% head(10) %>%
                ggplot(aes(x = Happiness_score, 
                y = reorder(Country, Happiness_score), 
                fill = Country)) + geom_point(aes(color = Region)) + 
                geom_bar(stat = "identity") + 
                labs(title = "Top Ten Happiest Countries") + 
                ylab("Countries") + xlab("
                Happiness Score") + scale_fill_brewer(palette = "Set3") + 
                geom_text(aes(label = Happiness_score), 
                position = position_stack(vjust = 0.9), color = "black", size = 3)

#Finland ranked as the 1st Happiest Country in 2021 out of 149 countries with 
Happiness Score = 7.842

In [None]:
# Bottom Ten Happiest Countries

df21 %>% arrange(
                Happiness_score) %>% head(10) %>%
                ggplot(aes(x = Happiness_score, 
                y = reorder(Country, Happiness_score), 
                fill = Country)) + geom_point(aes(color = Region)) + 
                geom_bar(stat = "identity") + 
                labs(title = "Bottom Ten Happiest Countries") + 
                ylab("Countries") + xlab("
                Happiness Score") + scale_fill_brewer(palette = "Set3") + 
                geom_text(aes(label = Happiness_score), 
                position = position_stack(vjust = 0.9), color = "black", size = 3)

#Afghanistan ranked as the last Happiest Country in 2021 with Happiness Score = 2.523

In [None]:
subset(df21, Country == "India")
subset(df21, Country == "Pakistan")


In [None]:
#Happiness Score Distribution across Region

data_means <- aggregate(df21$Happiness_score, list(df21$Region), mean)


data_means %>% arrange(desc(x)) %>% ggplot(aes(x = x, y = reorder(Group.1, x),
fill = Group.1)) + geom_bar(stat = "identity") + labs(title = "Regionwise Happiness") +
ylab("Region") + xlab("Happiness Score") + scale_fill_brewer(palette = "Set3") +
geom_text(aes(label = x), position = position_stack(vjust = 0.5),
color = "black", size = 3)

In [None]:
df21 %>% mutate(region = fct_reorder(Region, Happiness_score, median)) %>%
ggplot(aes(Happiness_score)) + geom_boxplot(aes(fill = region)) + coord_flip() + 
xlab("Happiness Score") + ylab("Region")

In [None]:
#Effect of GDP per Capita and Perceived Corruption

df21 %>% ggplot(aes(Happiness_score, GDP)) + geom_point(aes(color = Region)) +
xlab("Happiness Score") + ylab("GDP per Capita")

#Happiness Score increases with GDP per Capita:

In [None]:
df21 %>% ggplot(aes(Happiness_score, Corruption)) + 
geom_point(aes(color = Region)) + 
ylab("Corruption Perception") +
xlab("Happiness Score")

#Perceived Corruption is low in regions with high Happiness Score:

In [None]:
##Pearson Correlation matrix

cdf <- subset(df21, select = c("Happiness_score", "GDP", "Social_support",
"Life_expectancy", "Freedom", "Generosity", "Corruption", "Dystopia_residual"))
data.frame(cor(cdf))
corrplot(cor(cdf), method = 'color', type = "lower")

##values close to 1 signify correlation positive linear relation:

In [None]:
#Multiple linear Regression

#we will implement linear regression to predict happiness score
#First, we split our dataset in to training and test sets.

set.seed(9)
split = sample.split(cdf$Happiness_score, SplitRatio = 0.7)
training_set = subset(cdf, split == TRUE)
test_set = subset(cdf, split == FALSE)

