<h1> Data Reshaping in R </h1>


<h3> Transpose of a Matrix </h3>


In [None]:
# Creating a matrix
matrix_data <- matrix(c(1:12), nrow = 3, byrow = TRUE)
print("Original Matrix:")
print(matrix_data)

# Transposing the matrix
transposed_matrix <- t(matrix_data)
print("Transposed Matrix:")
print(transposed_matrix)

[1] "Original Matrix:"
     [,1] [,2] [,3] [,4]
[1,]    1    2    3    4
[2,]    5    6    7    8
[3,]    9   10   11   12
[1] "Transposed Matrix:"
     [,1] [,2] [,3]
[1,]    1    5    9
[2,]    2    6   10
[3,]    3    7   11
[4,]    4    8   12


<h3> Joining Rows and Columns </h3>


In [None]:
#  Using cbind() to Combine by Columns
name <- c("Alice", "Bob", "Charlie")
age <- c(25, 30, 35)
city <- c("New York", "Los Angeles", "Chicago")

data <- cbind(name, age, city)
data

name,age,city
Alice,25,New York
Bob,30,Los Angeles
Charlie,35,Chicago


In [None]:
# Using rbind() to Combine by Rows
data1 <- data.frame(Name = c("Alice", "Bob"), Age = c(25, 30))
data2 <- data.frame(Name = c("Charlie", "David"), Age = c(35, 40))

data <- rbind(data1, data2)
data


Name,Age
<chr>,<dbl>
Alice,25
Bob,30
Charlie,35
David,40


<h3> Merging DataFrame </h3>


In [None]:
# Creating two data frames with different columns
df1 <- data.frame(ID = c(1, 2, 3), Name = c("Alice", "Bob", "Charlie"))
df2 <- data.frame(ID = c(2, 3, 4), Age = c(30, 35, 28))

# Printing the data frames
print("Data Frame 1:")
print(df1)
print("Data Frame 2:")
print(df2)

[1] "Data Frame 1:"
  ID    Name
1  1   Alice
2  2     Bob
3  3 Charlie
[1] "Data Frame 2:"
  ID Age
1  2  30
2  3  35
3  4  28


In [None]:
# Performing an inner join
inner_join_df <- merge(df1, df2, by = "ID")
inner_join_df

ID,Name,Age
<dbl>,<chr>,<dbl>
2,Bob,30
3,Charlie,35


In [None]:
# Performing a left join
left_join_df <- merge(df1, df2, by = "ID", all.x = TRUE)
left_join_df

ID,Name,Age
<dbl>,<chr>,<dbl>
1,Alice,
2,Bob,30.0
3,Charlie,35.0


In [None]:
# Performing a right join
right_join_df <- merge(df1, df2, by = "ID", all.y = TRUE)
right_join_df

ID,Name,Age
<dbl>,<chr>,<dbl>
2,Bob,30
3,Charlie,35
4,,28


In [None]:
# Performing a full outer join
outer_join_df <- merge(df1, df2, by = "ID", all = TRUE)
outer_join_df

ID,Name,Age
<dbl>,<chr>,<dbl>
1,Alice,
2,Bob,30.0
3,Charlie,35.0
4,,28.0


<h3> Merging DataFrame using dylyr </h3>

In [None]:
library(dplyr)

# Creating two data frames
df1 <- data.frame(ID = c(1, 2, 3), Name = c("Alice", "Bob", "Charlie"))
df2 <- data.frame(ID = c(2, 3, 4), Age = c(30, 35, 28))

# Performing different types of joins using dplyr
inner_join_df <- inner_join(df1, df2, by = "ID")
print("Inner Join Data Frame:")
inner_join_df

left_join_df <- left_join(df1, df2, by = "ID")
print("Left Join Data Frame:")
left_join_df

right_join_df <- right_join(df1, df2, by = "ID")
print("Right Join Data Frame:")
right_join_df

full_join_df <- full_join(df1, df2, by = "ID")
print("Full Outer Join Data Frame:")
full_join_df



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




[1] "Inner Join Data Frame:"


ID,Name,Age
<dbl>,<chr>,<dbl>
2,Bob,30
3,Charlie,35


[1] "Left Join Data Frame:"


ID,Name,Age
<dbl>,<chr>,<dbl>
1,Alice,
2,Bob,30.0
3,Charlie,35.0


[1] "Right Join Data Frame:"


ID,Name,Age
<dbl>,<chr>,<dbl>
2,Bob,30
3,Charlie,35
4,,28


[1] "Full Outer Join Data Frame:"


ID,Name,Age
<dbl>,<chr>,<dbl>
1,Alice,
2,Bob,30.0
3,Charlie,35.0
4,,28.0


<h3> Melting and Casting DataFrame </h3>


In [None]:
# Install and load the reshape2 package
install.packages("reshape2")
library(reshape2)

# Creating a wide-format data frame
wide_data <- data.frame(ID = 1:3, Score1 = c(10, 20, 30), Score2 = c(15, 25, 35))
wide_data

# Melting to long format
long_data <- melt(wide_data, id.vars = "ID")
long_data


Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependency ‘plyr’




ID,Score1,Score2
<int>,<dbl>,<dbl>
1,10,15
2,20,25
3,30,35


ID,variable,value
<int>,<fct>,<dbl>
1,Score1,10
2,Score1,20
3,Score1,30
1,Score2,15
2,Score2,25
3,Score2,35


In [None]:
# Casting to wide format
wide <- dcast(long_data, ID ~ variable)
wide

ID,Score1,Score2
<int>,<dbl>,<dbl>
1,10,15
2,20,25
3,30,35


In [None]:
# Handling Missing values
data <- c(1, 2, NA, NaN , 4, 5, NaN)
data

print(is.na(data))
print(is.nan(data))

[1] FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE
[1] FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE


In [None]:
# Removing the missing values
data1 <- na.omit(data)
data1

In [None]:
# Imputing the missing values with mean
Mean <- mean(data, na.rm=TRUE)
data[is.na(data)]<- Mean
data

In [None]:
# Handling missing values in DataFrame
df<- data.frame(A = c(2,5,NA,8), B= c(8,1,4,NA))

# Identify rows with complete data
complete_rows <- complete.cases(df)
print(df[complete_rows, ])

  A B
1 2 8
2 5 1
