Skip to content

Commit

Permalink
vrouter: implement packet handling on Hyper-V
Browse files Browse the repository at this point in the history
Changes required to support Contrail on Windows:
- windows/vr_nbl.c - code responsible for handling incoming
  NET_BUFFER_LIST (Windows' internal packet representation)
- windows/vr_host_interface.c - implementation of
  vr_host_interface_ops callback layer on Windows

Initial work:
  https://github.com/codilime/contrail-vrouter/commits/windows

Change-Id: Icadff011051c0f44719d1a08e2e8042fa732e7b8
Partial-Bug: #1734699
  • Loading branch information
Dariusz Sosnowski committed Dec 11, 2017
1 parent 7e1320e commit e774954
Show file tree
Hide file tree
Showing 2 changed files with 847 additions and 0 deletions.
358 changes: 358 additions & 0 deletions windows/vr_host_interface.c
@@ -0,0 +1,358 @@
/*
* Copyright (c) 2017 Juniper Networks, Inc. All rights reserved.
*/
#include "precomp.h"

#include "vr_interface.h"
#include "vr_packet.h"
#include "vr_windows.h"
#include "vrouter.h"
#include "windows_devices.h"
#include "windows_nbl.h"

static NDIS_MUTEX win_if_mutex;

void
win_if_lock(void)
{
NDIS_WAIT_FOR_MUTEX(&win_if_mutex);
}

void
win_if_unlock(void)
{
NDIS_RELEASE_MUTEX(&win_if_mutex);
}

static int
win_if_add(struct vr_interface* vif)
{
if (vif->vif_type == VIF_TYPE_STATS)
return 0;

if (vif->vif_name[0] == '\0')
return -ENODEV;

// Unlike FreeBSD/Linux, we don't have to register handlers here

return 0;
}

static int
win_if_add_tap(struct vr_interface* vif)
{
UNREFERENCED_PARAMETER(vif);
// NOOP - no bridges on Windows
return 0;
}

static int
win_if_del(struct vr_interface *vif)
{
UNREFERENCED_PARAMETER(vif);
return 0;
}

static int
win_if_del_tap(struct vr_interface *vif)
{
UNREFERENCED_PARAMETER(vif);
// NOOP - no bridges on Windows; most *_drv_del function which call if_del_tap
// also call if_del
return 0;
}

static uint16_t
trim_csum(uint32_t csum)
{
while (csum & 0xffff0000)
csum = (csum >> 16) + (csum & 0x0000ffff);

return (uint16_t)csum;
}

static uint16_t
calc_csum(uint8_t* ptr, size_t size)
{
uint32_t csum = 0;
// Checksum based on payload
for (int i = 0; i < size; i++)
{
if (i & 1)
csum += ptr[i];
else
csum += ptr[i] << 8;
}

return trim_csum(csum);
}

static void
fix_ip_csum_at_offset(struct vr_packet *pkt, unsigned offset)
{
struct vr_ip *iph;

ASSERT(0 < offset);

iph = (struct vr_ip *)pkt_data_at_offset(pkt, offset);
iph->ip_csum = vr_ip_csum(iph);
}

static void
zero_ip_csum_at_offset(struct vr_packet *pkt, unsigned offset)
{
struct vr_ip *iph;

ASSERT(0 < offset);

iph = (struct vr_ip *)pkt_data_at_offset(pkt, offset);
iph->ip_csum = 0;
}

static bool fix_csum(struct vr_packet *pkt, unsigned offset)
{
uint32_t csum;
uint16_t size;
uint8_t type;

PNET_BUFFER_LIST nbl = pkt->vp_net_buffer_list;
PNET_BUFFER nb = NET_BUFFER_LIST_FIRST_NB(nbl);

void* packet_data_buffer = ExAllocatePoolWithTag(NonPagedPoolNx, NET_BUFFER_DATA_LENGTH(nb), VrAllocationTag);

// Copy the packet. This function will not fail if ExAllocatePoolWithTag succeeded
// So no need to clean it up
// If ExAllocatePoolWithTag failed (packet_data_buffer== NULL),
// this function will work okay if the data is contigous.
uint8_t* packet_data = NdisGetDataBuffer(nb, NET_BUFFER_DATA_LENGTH(nb), packet_data_buffer, 1, 0);

if (packet_data == NULL)
// No need for free
return false;

if (pkt->vp_type == VP_TYPE_IP6 || pkt->vp_type == VP_TYPE_IP6OIP) {
struct vr_ip6 *hdr = (struct vr_ip6*) (packet_data + offset);
offset += sizeof(struct vr_ip6);
size = ntohs(hdr->PayloadLength);

type = hdr->NextHeader;
} else {
struct vr_ip *hdr = (struct vr_ip*) &packet_data[offset];
offset += hdr->ip_hl * 4;
size = ntohs(hdr->ip_len) - 4 * hdr->ip_hl;

type = hdr->ip_proto;
}

uint8_t* payload = &packet_data[offset];
csum = calc_csum((uint8_t*) payload, size);

// This time it's the "real" packet. Header being contiguous is guaranteed, but nothing else
if (type == VR_IP_PROTO_UDP) {
struct vr_udp* udp = (struct vr_udp*) pkt_data_at_offset(pkt, offset);
udp->udp_csum = htons(~(trim_csum(csum)));
} else if (type == VR_IP_PROTO_TCP) {
struct vr_tcp* tcp = (struct vr_tcp*) pkt_data_at_offset(pkt, offset);
tcp->tcp_csum = htons(~(trim_csum(csum)));
}

if (packet_data_buffer)
ExFreePool(packet_data_buffer);

return true;
}

static void
fix_tunneled_csum(struct vr_packet *pkt)
{
PNET_BUFFER_LIST nbl = pkt->vp_net_buffer_list;
NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO settings;
settings.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);

if (settings.Transmit.IpHeaderChecksum) {
// Zero the outer checksum, it'll be offloaded
zero_ip_csum_at_offset(pkt, sizeof(struct vr_eth));
// Fix the inner checksum, it will not be offloaded
fix_ip_csum_at_offset(pkt, pkt->vp_inner_network_h);
} else {
// Fix the outer checksum
fix_ip_csum_at_offset(pkt, sizeof(struct vr_eth));
// Inner checksum is OK
}

if (settings.Transmit.TcpChecksum) {
// Calculate the header/data csum and turn off HW acceleration
if (fix_csum(pkt, pkt->vp_inner_network_h)) {
settings.Transmit.TcpChecksum = 0;
settings.Transmit.TcpHeaderOffset = 0;
NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo) = settings.Value;
}
// else try to offload it even though it's tunneled.
}

if (settings.Transmit.UdpChecksum) {
// Calculate the header/data csum and turn off HW acceleration
if (fix_csum(pkt, pkt->vp_inner_network_h)) {
settings.Transmit.UdpChecksum = 0;
NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo) = settings.Value;
}
// else try to offload it even though it's tunneled.
}
}

static int
__win_if_tx(struct vr_interface *vif, struct vr_packet *pkt)
{
if (vr_pkt_type_is_overlay(pkt->vp_type))
fix_tunneled_csum(pkt);

PNET_BUFFER_LIST nbl = pkt->vp_net_buffer_list;

NDIS_SWITCH_PORT_DESTINATION newDestination = { 0 };

newDestination.PortId = vif->vif_port;
newDestination.NicIndex = vif->vif_nic;
DbgPrint("Adding target, PID: %u, NID: %u\r\n", newDestination.PortId, newDestination.NicIndex);

VrSwitchObject->NdisSwitchHandlers.AddNetBufferListDestination(VrSwitchObject->NdisSwitchContext, nbl, &newDestination);

PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwd = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
fwd->IsPacketDataSafe = TRUE;

NdisAdvanceNetBufferListDataStart(nbl, pkt->vp_data, TRUE, NULL);

NdisFSendNetBufferLists(VrSwitchObject->NdisFilterHandle,
nbl,
NDIS_DEFAULT_PORT_NUMBER,
0);

return 0;
}

static int
win_if_tx(struct vr_interface *vif, struct vr_packet* pkt)
{
DbgPrint("%s: Got pkt\n", __func__);
if (vif == NULL) {
FreeNetBufferList(pkt->vp_net_buffer_list);
return 0; // Sent into /dev/null
}

if (vif->vif_type == VIF_TYPE_AGENT)
return pkt0_if_tx(vif, pkt);
else
return __win_if_tx(vif, pkt);
}

static int
win_if_rx(struct vr_interface *vif, struct vr_packet* pkt)
{
DbgPrint("%s: Got pkt\n", __func__);

// Since we are operating from virtual switch's PoV and not from OS's PoV, RXing is the same as TXing
// On Linux, we receive the packet as an OS, but in Windows we are a switch to we simply push the packet to OS's networking stack
// See vhost_tx for reference (it calls hif_ops->hif_rx)

win_if_tx(vif, pkt);

return 0;
}

static int
win_if_get_settings(struct vr_interface *vif, struct vr_interface_settings *settings)
{
UNREFERENCED_PARAMETER(vif);
UNREFERENCED_PARAMETER(settings);

/* TODO: Implement */
DbgPrint("%s(): dummy implementation called\n", __func__);

return -EINVAL;
}

static unsigned int
win_if_get_mtu(struct vr_interface *vif)
{
UNREFERENCED_PARAMETER(vif);

/* TODO: Implement */
DbgPrint("%s(): dummy implementation called\n", __func__);

return vif->vif_mtu;
}

static unsigned short
win_if_get_encap(struct vr_interface *vif)
{
UNREFERENCED_PARAMETER(vif);

/* TODO: Implement */
DbgPrint("%s(): dummy implementation called\n", __func__);

return VIF_ENCAP_TYPE_ETHER;
}

static struct vr_host_interface_ops win_host_interface_ops = {
.hif_lock = win_if_lock,
.hif_unlock = win_if_unlock,
.hif_add = win_if_add,
.hif_del = win_if_del,
.hif_add_tap = win_if_add_tap,
.hif_del_tap = win_if_del_tap,
.hif_tx = win_if_tx,
.hif_rx = win_if_rx,
.hif_get_settings = win_if_get_settings,
.hif_get_mtu = win_if_get_mtu,
.hif_get_encap = win_if_get_encap,
.hif_stats_update = NULL,
};

void
vr_host_vif_init(struct vrouter *router)
{
UNREFERENCED_PARAMETER(router);
}

void
vr_host_interface_exit(void)
{
/* Noop */
}

void
vhost_xconnect(void)
{
struct vrouter *vrouter = vrouter_get(0);
struct vr_interface *host_if;

if (vrouter->vr_host_if != NULL) {
host_if = vrouter->vr_host_if;
vif_set_xconnect(host_if);

if (host_if->vif_bridge != NULL)
vif_set_xconnect(host_if->vif_bridge);
}
}

void
vhost_remove_xconnect(void)
{
struct vrouter *vrouter = vrouter_get(0);
struct vr_interface *host_if;

if (vrouter->vr_host_if != NULL) {
host_if = vrouter->vr_host_if;
vif_remove_xconnect(host_if);

if (host_if->vif_bridge != NULL)
vif_remove_xconnect(host_if->vif_bridge);
}
}

struct vr_host_interface_ops *
vr_host_interface_init(void)
{
NDIS_INIT_MUTEX(&win_if_mutex);

return &win_host_interface_ops;
}

0 comments on commit e774954

Please sign in to comment.