# .NET with Apache Spark Example
This notebook sets up .NET and Microsoft.Spark, builds a C# WordCount project, and runs it with Spark.

In [None]:
# Install .NET 7 SDK
!wget https://dot.net/v1/dotnet-install.sh -O dotnet-install.sh
!chmod +x dotnet-install.sh
!./dotnet-install.sh --channel 7.0
import os
os.environ['PATH'] += ':$HOME/.dotnet'

In [None]:
# Check dotnet version
!dotnet --version

In [None]:
# Install Apache Spark 4.0.1
!wget https://downloads.apache.org/spark/spark-4.0.1/spark-4.0.1-bin-hadoop3.tgz
!tar -xzf spark-4.0.1-bin-hadoop3.tgz
!mv spark-4.0.1-bin-hadoop3 /opt/spark

In [None]:
# Install Microsoft.Spark.Worker
!mkdir -p /opt/spark-worker
!dotnet tool install --global Microsoft.Spark.Worker --version 2.3.0
import os
os.environ['PATH'] += ':$HOME/.dotnet/tools'

In [None]:
# Create a WordCountSpark project
!dotnet new console -n WordCountSpark
!cd WordCountSpark && dotnet add package Microsoft.Spark --version 2.3.0

In [None]:
# Write C# WordCount program
%%writefile WordCountSpark/Program.cs
using System;
using System.Linq;
using Microsoft.Spark.Sql;
using static Microsoft.Spark.Sql.Functions;

class Program
{
    static void Main(string[] args)
    {
        string filename = args.Length > 0 ? args[0] : "sample.txt";
        var spark = SparkSession.Builder().AppName("WordCountCSharp").GetOrCreate();
        DataFrame df = spark.Read().Text(filename);
        DataFrame wordsDf = df.Select(Explode(Split(Col("value"), " ")).Alias("word"));
        DataFrame wordCounts = wordsDf.GroupBy("word").Count();
        wordCounts.Show();
        spark.Stop();
    }
}

In [None]:
# Create sample input
!echo 'hello world hello spark' > sample.txt

In [None]:
# Build project
!cd WordCountSpark && dotnet build -c Release

In [None]:
# Run Spark job
!cd WordCountSpark && /opt/spark/bin/spark-submit \
  --class org.apache.spark.deploy.DotnetRunner \
  --master local \
  $HOME/.dotnet/tools/Microsoft.Spark.Worker \
  bin/Release/net7.0/WordCountSpark.dll sample.txt